Bug 1499026 - Part 2: Update in-tree ICU to release 63.1. rs=Waldo

--HG--
rename : intl/icu/source/common/ulistformatter.cpp => intl/icu/source/i18n/ulistformatter.cpp
This commit is contained in:
André Bargull 2018-11-01 06:32:25 -07:00
Родитель 7820c86808
Коммит 148e9c75e6
975 изменённых файлов: 50930 добавлений и 13997 удалений

11
config/external/icu/common/sources.mozbuild поставляемый
Просмотреть файл

@ -10,6 +10,7 @@ SOURCES += [
'/intl/icu/source/common/bytestriebuilder.cpp',
'/intl/icu/source/common/bytestrieiterator.cpp',
'/intl/icu/source/common/caniter.cpp',
'/intl/icu/source/common/characterproperties.cpp',
'/intl/icu/source/common/chariter.cpp',
'/intl/icu/source/common/charstr.cpp',
'/intl/icu/source/common/cmemory.cpp',
@ -25,7 +26,6 @@ SOURCES += [
'/intl/icu/source/common/filterednormalizer2.cpp',
'/intl/icu/source/common/icudataver.cpp',
'/intl/icu/source/common/icuplug.cpp',
'/intl/icu/source/common/listformatter.cpp',
'/intl/icu/source/common/loadednormalizer2impl.cpp',
'/intl/icu/source/common/locavailable.cpp',
'/intl/icu/source/common/locbased.cpp',
@ -115,6 +115,7 @@ SOURCES += [
'/intl/icu/source/common/ucnvscsu.cpp',
'/intl/icu/source/common/ucnvsel.cpp',
'/intl/icu/source/common/ucol_swp.cpp',
'/intl/icu/source/common/ucptrie.cpp',
'/intl/icu/source/common/ucurr.cpp',
'/intl/icu/source/common/udata.cpp',
'/intl/icu/source/common/udatamem.cpp',
@ -127,12 +128,12 @@ SOURCES += [
'/intl/icu/source/common/uinvchar.cpp',
'/intl/icu/source/common/uiter.cpp',
'/intl/icu/source/common/ulist.cpp',
'/intl/icu/source/common/ulistformatter.cpp',
'/intl/icu/source/common/uloc.cpp',
'/intl/icu/source/common/uloc_keytype.cpp',
'/intl/icu/source/common/uloc_tag.cpp',
'/intl/icu/source/common/umapfile.cpp',
'/intl/icu/source/common/umath.cpp',
'/intl/icu/source/common/umutablecptrie.cpp',
'/intl/icu/source/common/umutex.cpp',
'/intl/icu/source/common/unames.cpp',
'/intl/icu/source/common/unifiedcache.cpp',
@ -181,6 +182,7 @@ SOURCES += [
'/intl/icu/source/common/utrie.cpp',
'/intl/icu/source/common/utrie2.cpp',
'/intl/icu/source/common/utrie2_builder.cpp',
'/intl/icu/source/common/utrie_swap.cpp',
'/intl/icu/source/common/uts46.cpp',
'/intl/icu/source/common/utypes.cpp',
'/intl/icu/source/common/uvector.cpp',
@ -209,7 +211,6 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/icudataver.h',
'/intl/icu/source/common/unicode/icuplug.h',
'/intl/icu/source/common/unicode/idna.h',
'/intl/icu/source/common/unicode/listformatter.h',
'/intl/icu/source/common/unicode/localpointer.h',
'/intl/icu/source/common/unicode/locdspnm.h',
'/intl/icu/source/common/unicode/locid.h',
@ -247,6 +248,8 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/ucnv_err.h',
'/intl/icu/source/common/unicode/ucnvsel.h',
'/intl/icu/source/common/unicode/uconfig.h',
'/intl/icu/source/common/unicode/ucpmap.h',
'/intl/icu/source/common/unicode/ucptrie.h',
'/intl/icu/source/common/unicode/ucurr.h',
'/intl/icu/source/common/unicode/udata.h',
'/intl/icu/source/common/unicode/udisplaycontext.h',
@ -254,10 +257,10 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/uidna.h',
'/intl/icu/source/common/unicode/uiter.h',
'/intl/icu/source/common/unicode/uldnames.h',
'/intl/icu/source/common/unicode/ulistformatter.h',
'/intl/icu/source/common/unicode/uloc.h',
'/intl/icu/source/common/unicode/umachine.h',
'/intl/icu/source/common/unicode/umisc.h',
'/intl/icu/source/common/unicode/umutablecptrie.h',
'/intl/icu/source/common/unicode/unifilt.h',
'/intl/icu/source/common/unicode/unifunct.h',
'/intl/icu/source/common/unicode/unimatch.h',

Двоичные данные
config/external/icu/data/icudt62l.dat → config/external/icu/data/icudt63l.dat поставляемый

Двоичный файл не отображается.

8
config/external/icu/i18n/sources.mozbuild поставляемый
Просмотреть файл

@ -67,6 +67,7 @@ SOURCES += [
'/intl/icu/source/i18n/dtitvinf.cpp',
'/intl/icu/source/i18n/dtptngen.cpp',
'/intl/icu/source/i18n/dtrule.cpp',
'/intl/icu/source/i18n/erarules.cpp',
'/intl/icu/source/i18n/esctrn.cpp',
'/intl/icu/source/i18n/ethpccal.cpp',
'/intl/icu/source/i18n/fmtable.cpp',
@ -83,6 +84,7 @@ SOURCES += [
'/intl/icu/source/i18n/inputext.cpp',
'/intl/icu/source/i18n/islamcal.cpp',
'/intl/icu/source/i18n/japancal.cpp',
'/intl/icu/source/i18n/listformatter.cpp',
'/intl/icu/source/i18n/measfmt.cpp',
'/intl/icu/source/i18n/measunit.cpp',
'/intl/icu/source/i18n/measure.cpp',
@ -129,6 +131,8 @@ SOURCES += [
'/intl/icu/source/i18n/numparse_stringsegment.cpp',
'/intl/icu/source/i18n/numparse_symbols.cpp',
'/intl/icu/source/i18n/numparse_validators.cpp',
'/intl/icu/source/i18n/numrange_fluent.cpp',
'/intl/icu/source/i18n/numrange_impl.cpp',
'/intl/icu/source/i18n/numsys.cpp',
'/intl/icu/source/i18n/olsontz.cpp',
'/intl/icu/source/i18n/persncal.cpp',
@ -196,6 +200,7 @@ SOURCES += [
'/intl/icu/source/i18n/udatpg.cpp',
'/intl/icu/source/i18n/ufieldpositer.cpp',
'/intl/icu/source/i18n/uitercollationiterator.cpp',
'/intl/icu/source/i18n/ulistformatter.cpp',
'/intl/icu/source/i18n/ulocdata.cpp',
'/intl/icu/source/i18n/umsg.cpp',
'/intl/icu/source/i18n/unesctrn.cpp',
@ -250,12 +255,14 @@ EXPORTS.unicode += [
'/intl/icu/source/i18n/unicode/fpositer.h',
'/intl/icu/source/i18n/unicode/gender.h',
'/intl/icu/source/i18n/unicode/gregocal.h',
'/intl/icu/source/i18n/unicode/listformatter.h',
'/intl/icu/source/i18n/unicode/measfmt.h',
'/intl/icu/source/i18n/unicode/measunit.h',
'/intl/icu/source/i18n/unicode/measure.h',
'/intl/icu/source/i18n/unicode/msgfmt.h',
'/intl/icu/source/i18n/unicode/nounit.h',
'/intl/icu/source/i18n/unicode/numberformatter.h',
'/intl/icu/source/i18n/unicode/numberrangeformatter.h',
'/intl/icu/source/i18n/unicode/numfmt.h',
'/intl/icu/source/i18n/unicode/numsys.h',
'/intl/icu/source/i18n/unicode/plurfmt.h',
@ -292,6 +299,7 @@ EXPORTS.unicode += [
'/intl/icu/source/i18n/unicode/ufieldpositer.h',
'/intl/icu/source/i18n/unicode/uformattable.h',
'/intl/icu/source/i18n/unicode/ugender.h',
'/intl/icu/source/i18n/unicode/ulistformatter.h',
'/intl/icu/source/i18n/unicode/ulocdata.h',
'/intl/icu/source/i18n/unicode/umsg.h',
'/intl/icu/source/i18n/unicode/unirepl.h',

Просмотреть файл

@ -1,7 +1,7 @@
commit 4a3ba8eee90ea1414d4f7ee36563e6c9b28fda96
Author: Yoshito Umaoka <y.umaoka@gmail.com>
Date: Wed Jun 20 05:34:56 2018 +0000
commit 6cbd62e59e30f73b444be89ea71fd74275ac53a4
Author: Shane Carr <shane@unicode.org>
Date: Mon Oct 29 23:52:44 2018 -0700
ICU-13823 Merged #13840 number parser memory overflow fix (r41541) to maint-62 for 62.1 GA.
ICU-20246 Fixing another integer overflow in number parsing.
X-SVN-Rev: 41542
(cherry picked from commit 53d8c8f3d181d87a6aa925b449b51c4a2c922a51)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -48,6 +48,8 @@ ALL_PKGCONFIG_SUFFIX=uc i18n
DOXYGEN = @DOXYGEN@
DOCZIP = icu-docs.zip
INSTALL_ICU_CONFIG = @INSTALL_ICU_CONFIG@
## Files to remove for 'make clean'
CLEANFILES = *~
@ -64,7 +66,9 @@ SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) $(DATASUBDIR) $(EX
SECTION = 1
ifeq ($(INSTALL_ICU_CONFIG),true)
MANX_FILES = config/icu-config.$(SECTION)
endif
ALL_MAN_FILES = $(MANX_FILES)
@ -191,13 +195,15 @@ install-icu: $(INSTALLED_BUILT_FILES)
@$(MKINSTALLDIRS) $(DESTDIR)$(libdir)/pkgconfig
$(INSTALL_DATA) $(ALL_PKGCONFIG_FILES) $(DESTDIR)$(libdir)/pkgconfig/
$(INSTALL_DATA) $(top_srcdir)/../LICENSE $(DESTDIR)$(pkgdatadir)/LICENSE
ifeq ($(INSTALL_ICU_CONFIG),true)
$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
endif
$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
$(INSTALL_DATA) $(top_builddir)/config/pkgdata.inc $(DESTDIR)$(pkglibdir)/pkgdata.inc
# @echo icuinfo.xml is built after make check.
# -$(INSTALL_DATA) $(top_builddir)/config/icuinfo.xml $(DESTDIR)$(pkglibdir)/icuinfo.xml
cd $(DESTDIR)$(pkglibdir)/..; \
$(RM) current && ln -s $(VERSION) current; \
$(RMV) current && ln -s $(VERSION) current; \
$(RM) Makefile.inc && ln -s current/Makefile.inc Makefile.inc; \
$(RM) pkgdata.inc && ln -s current/pkgdata.inc pkgdata.inc
@ -354,7 +360,9 @@ config.status: $(srcdir)/configure $(srcdir)/common/unicode/uvernum.h
install-manx: $(MANX_FILES)
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
ifneq ($(MANX_FILES),)
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
endif
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
cd $(top_builddir) \

Просмотреть файл

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2018 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set configurations that are common to *all* ICU library code (common, i18n, and io). -->
<!-- Note: These options are for *all* configurations for *all* library projects. -->
<ItemDefinitionGroup>
<ClCompile>
<!-- ICU does not use exceptions in library code. -->
<PreprocessorDefinitions>
_HAS_EXCEPTIONS=0;
%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<PropertyGroup>
<!-- Disable MSBuild warning about Linker OutputFile. -->
<!-- Ex: MSBuild complains that the common project creates "icuuc62.dll" rather than "common.dll". However, this is intentional. -->
<MSBuildWarningsAsMessages>MSB8012</MSBuildWarningsAsMessages>
</PropertyGroup>
</Project>

Просмотреть файл

@ -1,129 +1,129 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set default configuration options for all non-UWP Visual Studio projects. -->
<!-- These are the default project configurations for building. -->
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup>
<!-- This is the version of the MSVC tool-set to use. -->
<!-- v140 is the Visual Studio 2015 toolset. -->
<!-- v141 is the Visual Studio 2017 toolset. -->
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup>
<!-- This is the default SDK target. -->
<!-- Note that the Windows 8.1 SDK is backwards compatible down-level to Windows 7, so
setting this to 8.1 does not actually imply targeting Windows 8.1. -->
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup>
<!-- We need to explicitly set the target version to Windows 7. -->
<Win32_WinNTVersion>0x0601</Win32_WinNTVersion>
</PropertyGroup>
<!-- Options that are common to *all* configurations for *all* projects. -->
<ItemDefinitionGroup>
<Midl>
<MkTypLibCompatible>true</MkTypLibCompatible>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Midl>
<ClCompile>
<!-- Note: These preprocessor defines are for *all* configurations for *all* projects. -->
<!-- Note: See ticket #5750 for the macro '_CRT_SECURE_NO_DEPRECATE'. -->
<PreprocessorDefinitions>
WINVER=$(Win32_WinNTVersion);
_WIN32_WINNT=$(Win32_WinNTVersion);
_CRT_SECURE_NO_DEPRECATE;
%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<!-- We always want to treat wchar_t as a "real" C++ type, instead of a typedef. -->
<TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
<SuppressStartupBanner>true</SuppressStartupBanner>
<!-- Set the source encoding and runtime encoding to UTF-8 by default. -->
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<!-- Enable parallel compilation for faster builds. -->
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<ResourceCompile>
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 'Release' configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<Midl>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 'Debug' configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<Midl>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>Disabled</Optimization>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<BufferSecurityCheck>true</BufferSecurityCheck>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 32-bit configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<Midl>
<TargetEnvironment>Win32</TargetEnvironment>
</Midl>
<ClCompile>
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 64-bit configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<PreprocessorDefinitions>WIN64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<TargetMachine>MachineX64</TargetMachine>
</Link>
</ItemDefinitionGroup>
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set default configuration options for all non-UWP Visual Studio projects. -->
<!-- These are the default project configurations for building. -->
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup>
<!-- This is the version of the MSVC tool-set to use. -->
<!-- v140 is the Visual Studio 2015 toolset. -->
<!-- v141 is the Visual Studio 2017 toolset. -->
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup>
<!-- This is the default SDK target. -->
<!-- Note that the Windows 8.1 SDK is backwards compatible down-level to Windows 7, so
setting this to 8.1 does not actually imply targeting Windows 8.1. -->
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup>
<!-- We need to explicitly set the target version to Windows 7. -->
<Win32_WinNTVersion>0x0601</Win32_WinNTVersion>
</PropertyGroup>
<!-- Options that are common to *all* configurations for *all* projects. -->
<ItemDefinitionGroup>
<Midl>
<MkTypLibCompatible>true</MkTypLibCompatible>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Midl>
<ClCompile>
<!-- Note: These preprocessor defines are for *all* configurations for *all* projects. -->
<!-- Note: See ticket #5750 for the macro '_CRT_SECURE_NO_DEPRECATE'. -->
<PreprocessorDefinitions>
WINVER=$(Win32_WinNTVersion);
_WIN32_WINNT=$(Win32_WinNTVersion);
_CRT_SECURE_NO_DEPRECATE;
%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<!-- We always want to treat wchar_t as a "real" C++ type, instead of a typedef. -->
<TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
<SuppressStartupBanner>true</SuppressStartupBanner>
<!-- Set the source encoding and runtime encoding to UTF-8 by default. -->
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<!-- Enable parallel compilation for faster builds. -->
<MultiProcessorCompilation>true</MultiProcessorCompilation>
</ClCompile>
<ResourceCompile>
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 'Release' configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<Midl>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 'Debug' configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<Midl>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>Disabled</Optimization>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<BufferSecurityCheck>true</BufferSecurityCheck>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 32-bit configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<Midl>
<TargetEnvironment>Win32</TargetEnvironment>
</Midl>
<ClCompile>
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<TargetMachine>MachineX86</TargetMachine>
</Link>
</ItemDefinitionGroup>
<!-- Options that are common to all 64-bit configurations for *all* projects. -->
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<PreprocessorDefinitions>WIN64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<TargetMachine>MachineX64</TargetMachine>
</Link>
</ItemDefinitionGroup>
</Project>

Просмотреть файл

@ -1,41 +1,41 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set common configuration options for all *_uwp projects. -->
<PropertyGroup>
<!-- If not already set, use this version of the Win10 SDK -->
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<!-- If not already set, set the minimum Win10 SDK version to TH1/RTM -->
<WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
<MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
<AppContainerApplication>true</AppContainerApplication>
<ApplicationType>Windows Store</ApplicationType>
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
</PropertyGroup>
<PropertyGroup>
<!-- This is the version of the MSVC tool-set to use. -->
<!-- v141 is the Visual Studio 2017 toolset. -->
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<ItemDefinitionGroup>
<Midl>
<PreprocessorDefinitions>
%(PreprocessorDefinitions)
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>
%(PreprocessorDefinitions);
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>
%(PreprocessorDefinitions)
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</ResourceCompile>
</ItemDefinitionGroup>
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- This file is used to set common configuration options for all *_uwp projects. -->
<PropertyGroup>
<!-- If not already set, use this version of the Win10 SDK -->
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<!-- If not already set, set the minimum Win10 SDK version to TH1/RTM -->
<WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
<MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
<AppContainerApplication>true</AppContainerApplication>
<ApplicationType>Windows Store</ApplicationType>
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
</PropertyGroup>
<PropertyGroup>
<!-- This is the version of the MSVC tool-set to use. -->
<!-- v141 is the Visual Studio 2017 toolset. -->
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<ItemDefinitionGroup>
<Midl>
<PreprocessorDefinitions>
%(PreprocessorDefinitions)
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</Midl>
<ClCompile>
<PreprocessorDefinitions>
%(PreprocessorDefinitions);
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>
%(PreprocessorDefinitions)
U_PLATFORM_HAS_WINUWP_API=1;
</PreprocessorDefinitions>
</ResourceCompile>
</ItemDefinitionGroup>
</Project>

Просмотреть файл

@ -1,27 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<!--
This file is used to copy all of the header files (*.h) from a project's "unicode" folder to a common output folder.
-->
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<!-- This is the location of the common output folder. -->
<CopyDestionationPath>$(SolutionDir)\..\..\include\unicode</CopyDestionationPath>
<BuildDependsOn>
$(BuildDependsOn);
CopyUnicodeHeaderFiles;
</BuildDependsOn>
</PropertyGroup>
<Target Name="CopyUnicodeHeaderFiles">
<ItemGroup>
<!-- Generate a list of all files that end in .h from the 'unicode' folder, relative to the current project. -->
<OutputFiles Include=".\unicode\**\*.h" />
</ItemGroup>
<!-- This message will be logged in the project's build output. -->
<Message Text="Copying @(OutputFiles->Count()) header files to $(CopyDestionationPath). Files copied: @(OutputFiles)" Importance="high"/>
<!-- Perform the copy. -->
<Copy SourceFiles="@(OutputFiles)"
DestinationFolder="$(CopyDestionationPath)\%(RecursiveDir)"
SkipUnchangedFiles="false"></Copy>
</Target>
<?xml version="1.0" encoding="utf-8"?>
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
<!--
This file is used to copy all of the header files (*.h) from a project's "unicode" folder to a common output folder.
-->
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<!-- This is the location of the common output folder. -->
<CopyDestionationPath>$(SolutionDir)\..\..\include\unicode</CopyDestionationPath>
<BuildDependsOn>
$(BuildDependsOn);
CopyUnicodeHeaderFiles;
</BuildDependsOn>
</PropertyGroup>
<Target Name="CopyUnicodeHeaderFiles">
<ItemGroup>
<!-- Generate a list of all files that end in .h from the 'unicode' folder, relative to the current project. -->
<OutputFiles Include=".\unicode\**\*.h" />
</ItemGroup>
<!-- This message will be logged in the project's build output. -->
<Message Text="Copying @(OutputFiles->Count()) header files to $(CopyDestionationPath). Files copied: @(OutputFiles)" Importance="high"/>
<!-- Perform the copy. -->
<Copy SourceFiles="@(OutputFiles)"
DestinationFolder="$(CopyDestionationPath)\%(RecursiveDir)"
SkipUnchangedFiles="false"></Copy>
</Target>
</Project>

Просмотреть файл

@ -52,7 +52,7 @@ set ICUFAILCNT=0
@echo ==== %THT% =========================================================================
%ICUINFO_CMD% %ICUINFO_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_icuinfo
@ -63,7 +63,7 @@ set ICUFAILCNT=0
@cd %ICU_ICUDIR%\source\test\intltest
%INTLTEST_CMD% %INTLTEST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_intltest
@ -74,7 +74,7 @@ set ICUFAILCNT=0
@cd %ICU_ICUDIR%\source\test\iotest
%IOTEST_CMD% %IOTEST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_IOTEST
@ -85,7 +85,7 @@ set ICUFAILCNT=0
@cd %ICU_ICUDIR%\source\test\cintltst
%CINTLTST_CMD% %CINTLTST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_cintltst
@ -97,7 +97,7 @@ set ICUFAILCNT=0
@REM @cd %ICU_ICUDIR%\source\test\letest
@REM %LETST_CMD% %LETEST_OPTS%
@REM @IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@REM @IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
@REM @set ICUFAILED=%ICUFAILED% %THT%
@REM @set ICUFAILCNT=1
@REM :OK_letest

Просмотреть файл

@ -81,7 +81,7 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o \
uinit.o uobject.o cmemory.o charstr.o cstr.o \
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o utrie_swap.o ucol_swp.o utrace.o \
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
@ -100,15 +100,17 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o characterproperties.o \
ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o uscript_props.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \
bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
ulist.o uloc_tag.o icudataver.o icuplug.o \
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
ubiditransform.o \
pluralmap.o \

Просмотреть файл

@ -241,13 +241,13 @@ void BMPSet::overrideIllegal() {
bmpBlockBits[i]|=bits;
}
mask=~(0x10001<<0xd); // Lead byte 0xED.
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
bits=1<<0xd;
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
}
} else {
mask=~(0x10001<<0xd); // Lead byte 0xED.
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask;
}

Просмотреть файл

@ -11,6 +11,7 @@
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "uassert.h"
@ -120,4 +121,41 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
return TRUE;
}
CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
}
CharStringByteSink::~CharStringByteSink() = default;
void
CharStringByteSink::Append(const char* bytes, int32_t n) {
UErrorCode status = U_ZERO_ERROR;
dest_.append(bytes, n, status);
// Any errors are silently ignored.
}
char*
CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return nullptr;
}
UErrorCode status = U_ZERO_ERROR;
char* result = dest_.getAppendBuffer(
min_capacity,
desired_capacity_hint,
*result_capacity,
status);
if (U_SUCCESS(status)) {
return result;
}
*result_capacity = scratch_capacity;
return scratch;
}
U_NAMESPACE_END

Просмотреть файл

@ -13,6 +13,7 @@
U_NAMESPACE_BEGIN
class ByteSink;
class CharString;
class Edits;
class U_COMMON_API ByteSinkUtil {
@ -58,4 +59,25 @@ private:
ByteSink &sink, uint32_t options, Edits *edits);
};
class CharStringByteSink : public ByteSink {
public:
CharStringByteSink(CharString* dest);
~CharStringByteSink() override;
CharStringByteSink() = delete;
CharStringByteSink(const CharStringByteSink&) = delete;
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
void Append(const char* bytes, int32_t n) override;
char* GetAppendBuffer(int32_t min_capacity,
int32_t desired_capacity_hint,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) override;
private:
CharString& dest_;
};
U_NAMESPACE_END

Просмотреть файл

@ -339,7 +339,8 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
hash=hash*37+ustr_hashCharsN(bytes, len);
hash=static_cast<int32_t>(
static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
}
UBool

Просмотреть файл

@ -0,0 +1,336 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// characterproperties.cpp
// created: 2018sep03 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/uchar.h"
#include "unicode/ucpmap.h"
#include "unicode/ucptrie.h"
#include "unicode/umutablecptrie.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
#include "unicode/uset.h"
#include "cmemory.h"
#include "mutex.h"
#include "normalizer2impl.h"
#include "uassert.h"
#include "ubidi_props.h"
#include "ucase.h"
#include "ucln_cmn.h"
#include "umutex.h"
#include "uprops.h"
using icu::UInitOnce;
using icu::UnicodeSet;
namespace {
UBool U_CALLCONV characterproperties_cleanup();
struct Inclusion {
UnicodeSet *fSet;
UInitOnce fInitOnce;
};
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
UMutex cpMutex = U_MUTEX_INITIALIZER;
//----------------------------------------------------------------
// Inclusions list
//----------------------------------------------------------------
// USetAdder implementation
// Does not use uset.h to reduce code dependencies
void U_CALLCONV
_set_add(USet *set, UChar32 c) {
((UnicodeSet *)set)->add(c);
}
void U_CALLCONV
_set_addRange(USet *set, UChar32 start, UChar32 end) {
((UnicodeSet *)set)->add(start, end);
}
void U_CALLCONV
_set_addString(USet *set, const UChar *str, int32_t length) {
((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
}
UBool U_CALLCONV characterproperties_cleanup() {
for (Inclusion &in: gInclusions) {
delete in.fSet;
in.fSet = nullptr;
in.fInitOnce.reset();
}
for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
delete sets[i];
sets[i] = nullptr;
}
for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
maps[i] = nullptr;
}
return TRUE;
}
} // namespace
U_NAMESPACE_BEGIN
/*
Reduce excessive reallocation, and make it easier to detect initialization problems.
Usually you don't see smaller sets than this for Unicode 5.0.
*/
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;
void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
// This function is a friend of class UnicodeSet.
U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
if (src == UPROPS_SRC_NONE) {
errorCode = U_INTERNAL_PROGRAM_ERROR;
return;
}
UnicodeSet * &incl = gInclusions[src].fSet;
U_ASSERT(incl == nullptr);
incl = new UnicodeSet();
if (incl == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
USetAdder sa = {
(USet *)incl,
_set_add,
_set_addRange,
_set_addString,
nullptr, // don't need remove()
nullptr // don't need removeRange()
};
incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
switch(src) {
case UPROPS_SRC_CHAR:
uchar_addPropertyStarts(&sa, &errorCode);
break;
case UPROPS_SRC_PROPSVEC:
upropsvec_addPropertyStarts(&sa, &errorCode);
break;
case UPROPS_SRC_CHAR_AND_PROPSVEC:
uchar_addPropertyStarts(&sa, &errorCode);
upropsvec_addPropertyStarts(&sa, &errorCode);
break;
#if !UCONFIG_NO_NORMALIZATION
case UPROPS_SRC_CASE_AND_NORM: {
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
impl->addPropertyStarts(&sa, errorCode);
}
ucase_addPropertyStarts(&sa, &errorCode);
break;
}
case UPROPS_SRC_NFC: {
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
impl->addPropertyStarts(&sa, errorCode);
}
break;
}
case UPROPS_SRC_NFKC: {
const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
impl->addPropertyStarts(&sa, errorCode);
}
break;
}
case UPROPS_SRC_NFKC_CF: {
const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
if(U_SUCCESS(errorCode)) {
impl->addPropertyStarts(&sa, errorCode);
}
break;
}
case UPROPS_SRC_NFC_CANON_ITER: {
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
impl->addCanonIterPropertyStarts(&sa, errorCode);
}
break;
}
#endif
case UPROPS_SRC_CASE:
ucase_addPropertyStarts(&sa, &errorCode);
break;
case UPROPS_SRC_BIDI:
ubidi_addPropertyStarts(&sa, &errorCode);
break;
case UPROPS_SRC_INPC:
case UPROPS_SRC_INSC:
case UPROPS_SRC_VO:
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
break;
default:
errorCode = U_INTERNAL_PROGRAM_ERROR;
break;
}
if (U_FAILURE(errorCode)) {
delete incl;
incl = nullptr;
return;
}
// Compact for caching
incl->compact();
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}
const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
if (src < 0 || UPROPS_SRC_COUNT <= src) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Inclusion &i = gInclusions[src];
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
return i.fSet;
}
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
UProperty prop, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
UPropertySource src = uprops_getSource(prop);
return getInclusionsForSource(src, errorCode);
}
U_NAMESPACE_END
namespace {
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
if (set.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
const UnicodeSet *inclusions =
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
if (U_FAILURE(errorCode)) { return nullptr; }
int32_t numRanges = inclusions->getRangeCount();
UChar32 startHasProperty = -1;
for (int32_t i = 0; i < numRanges; ++i) {
UChar32 rangeEnd = inclusions->getRangeEnd(i);
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
// TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
if (u_hasBinaryProperty(c, property)) {
if (startHasProperty < 0) {
// Transition from false to true.
startHasProperty = c;
}
} else if (startHasProperty >= 0) {
// Transition from true to false.
set->add(startHasProperty, c - 1);
startHasProperty = -1;
}
}
}
if (startHasProperty >= 0) {
set->add(startHasProperty, 0x10FFFF);
}
set->freeze();
return set.orphan();
}
UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
icu::LocalUMutableCPTriePointer mutableTrie(
umutablecptrie_open(nullValue, nullValue, &errorCode));
const UnicodeSet *inclusions =
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
if (U_FAILURE(errorCode)) { return nullptr; }
int32_t numRanges = inclusions->getRangeCount();
UChar32 start = 0;
uint32_t value = nullValue;
for (int32_t i = 0; i < numRanges; ++i) {
UChar32 rangeEnd = inclusions->getRangeEnd(i);
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
uint32_t nextValue = u_getIntPropertyValue(c, property);
if (value != nextValue) {
if (value != nullValue) {
umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
}
start = c;
value = nextValue;
}
}
}
if (value != 0) {
umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
}
UCPTrieType type;
if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
type = UCPTRIE_TYPE_FAST;
} else {
type = UCPTRIE_TYPE_SMALL;
}
UCPTrieValueWidth valueWidth;
// TODO: UCharacterProperty.IntProperty
int32_t max = u_getIntPropertyMaxValue(property);
if (max <= 0xff) {
valueWidth = UCPTRIE_VALUE_BITS_8;
} else if (max <= 0xffff) {
valueWidth = UCPTRIE_VALUE_BITS_16;
} else {
valueWidth = UCPTRIE_VALUE_BITS_32;
}
return reinterpret_cast<UCPMap *>(
umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
}
} // namespace
U_NAMESPACE_USE
U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) { return nullptr; }
if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Mutex m(&cpMutex);
UnicodeSet *set = sets[property];
if (set == nullptr) {
sets[property] = set = makeSet(property, *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) { return nullptr; }
return set->toUSet();
}
U_CAPI const UCPMap * U_EXPORT2
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) { return nullptr; }
if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Mutex m(&cpMutex);
UCPMap *map = maps[property - UCHAR_INT_START];
if (map == nullptr) {
maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
}
return map;
}

Просмотреть файл

@ -79,7 +79,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this;
}
if(sLength<0) {
sLength=uprv_strlen(s);
sLength= static_cast<int32_t>(uprv_strlen(s));
}
if(sLength>0) {
if(s==(buffer.getAlias()+len)) {
@ -126,15 +126,21 @@ char *CharString::getAppendBuffer(int32_t minCapacity,
}
CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
}
CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if (!uprv_isInvariantUnicodeString(s)) {
if (!uprv_isInvariantUString(uchars, ucharsLen)) {
errorCode = U_INVARIANT_CONVERSION_ERROR;
return *this;
}
if(ensureCapacity(len+s.length()+1, 0, errorCode)) {
len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV);
if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
len += ucharsLen;
buffer[len] = 0;
}
return *this;
}

Просмотреть файл

@ -123,6 +123,7 @@ public:
UErrorCode &errorCode);
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
/**
* Appends a filename/path part, e.g., a directory name.

Просмотреть файл

@ -172,7 +172,7 @@ public:
* @return *this
*/
LocalMemory<T> &moveFrom(LocalMemory<T> &src) U_NOEXCEPT {
delete[] LocalPointerBase<T>::ptr;
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=src.ptr;
src.ptr=NULL;
return *this;
@ -279,6 +279,10 @@ inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t le
*
* Unlike LocalMemory and LocalArray, this class never adopts
* (takes ownership of) another array.
*
* WARNING: MaybeStackArray only works with primitive (plain-old data) types.
* It does NOT know how to call a destructor! If you work with classes with
* destructors, consider LocalArray in localpointer.h.
*/
template<typename T, int32_t stackCapacity>
class MaybeStackArray {

Просмотреть файл

@ -2,7 +2,8 @@
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- The following import will include the 'default' configuration options for VS projects. -->
<Import Project="..\allinone\Build.Windows.ProjectConfiguration.props" />
<!-- The following import will include the library configuration options for VS projects. -->
<Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
<PropertyGroup Label="Globals">
<ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid>
</PropertyGroup>
@ -85,7 +86,7 @@
<ProgramDataBaseFileName>.\x86\Release/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin\icuuc62.dll</OutputFile>
<OutputFile>..\..\bin\icuuc63.dll</OutputFile>
<AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile>
<DataExecutionPrevention>
@ -105,7 +106,7 @@
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
</ClCompile>
<Link>
<OutputFile>..\..\bin\icuuc62d.dll</OutputFile>
<OutputFile>..\..\bin\icuuc63d.dll</OutputFile>
<AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile>
<DataExecutionPrevention>
@ -124,7 +125,7 @@
<ProgramDataBaseFileName>.\x64\Release/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64\icuuc62.dll</OutputFile>
<OutputFile>..\..\bin64\icuuc63.dll</OutputFile>
<AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64\icuuc.lib</ImportLibrary>
@ -142,7 +143,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<OutputFile>..\..\bin64\icuuc62d.dll</OutputFile>
<OutputFile>..\..\bin64\icuuc63d.dll</OutputFile>
<AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64\icuucd.lib</ImportLibrary>
@ -182,6 +183,7 @@
<ClCompile Include="ustack.cpp" />
<ClCompile Include="ustrenum.cpp" />
<ClCompile Include="utrie.cpp" />
<ClCompile Include="utrie_swap.cpp" />
<ClCompile Include="utrie2.cpp" />
<ClCompile Include="utrie2_builder.cpp" />
<ClCompile Include="uvector.cpp" />
@ -268,6 +270,7 @@
<ClCompile Include="ruleiter.cpp" />
<ClCompile Include="ucase.cpp" />
<ClCompile Include="uchar.cpp" />
<ClCompile Include="characterproperties.cpp" />
<ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp" />
<ClCompile Include="unifilt.cpp" />
@ -315,8 +318,10 @@
<ClCompile Include="ucharstriebuilder.cpp" />
<ClCompile Include="ucharstrieiterator.cpp" />
<ClCompile Include="uchriter.cpp" />
<ClCompile Include="ucptrie.cpp" />
<ClCompile Include="uinvchar.cpp" />
<ClCompile Include="uiter.cpp" />
<ClCompile Include="umutablecptrie.cpp" />
<ClCompile Include="unistr.cpp" />
<ClCompile Include="unistr_case.cpp" />
<ClCompile Include="unistr_case_locale.cpp" />
@ -332,8 +337,6 @@
<ClCompile Include="ustrtrns.cpp" />
<ClCompile Include="utext.cpp" />
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="static_unicode_sets.cpp" />
<ClInclude Include="localsvc.h" />
<ClInclude Include="msvcres.h" />

Просмотреть файл

@ -139,6 +139,9 @@
<ClCompile Include="utrie.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="utrie_swap.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="utrie2.cpp">
<Filter>collections</Filter>
</ClCompile>
@ -385,6 +388,9 @@
<ClCompile Include="bmpset.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="characterproperties.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="propname.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
@ -562,12 +568,6 @@
<ClCompile Include="bytestriebuilder.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="listformatter.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="ulistformatter.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="messagepattern.cpp">
<Filter>formatting</Filter>
</ClCompile>
@ -589,6 +589,12 @@
<ClCompile Include="ucharstrieiterator.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="ucptrie.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="umutablecptrie.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="patternprops.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
@ -1186,12 +1192,6 @@
<CustomBuild Include="unicode\messagepattern.h">
<Filter>formatting</Filter>
</CustomBuild>
<CustomBuild Include="unicode\listformatter.h">
<Filter>formatting</Filter>
</CustomBuild>
<CustomBuild Include="unicode\ulistformatter.h">
<Filter>formatting</Filter>
</CustomBuild>
<CustomBuild Include="unicode\appendable.h">
<Filter>strings</Filter>
</CustomBuild>
@ -1204,6 +1204,12 @@
<CustomBuild Include="unicode\ucharstriebuilder.h">
<Filter>collections</Filter>
</CustomBuild>
<CustomBuild Include="unicode\ucptrie.h">
<Filter>collections</Filter>
</CustomBuild>
<CustomBuild Include="unicode\umutablecptrie.h">
<Filter>collections</Filter>
</CustomBuild>
<CustomBuild Include="unicode\enumset.h">
<Filter>data &amp; memory</Filter>
</CustomBuild>
@ -1217,4 +1223,4 @@
<Filter>strings</Filter>
</CustomBuild>
</ItemGroup>
</Project>
</Project>

Просмотреть файл

@ -1,6 +1,9 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<!-- The following import will include the UWP configuration options for VS projects. -->
<Import Project="..\allinone\Build.Windows.UWP.ProjectConfiguration.props" />
<!-- The following import will include the library configuration options for VS projects. -->
<Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
@ -184,7 +187,7 @@
<ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuuc62.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc63.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
</Link>
@ -200,7 +203,7 @@
<ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuuc62d.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc63d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
</Link>
@ -216,7 +219,7 @@
<ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuuc62.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc63.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
</Link>
@ -232,7 +235,7 @@
<ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuuc62d.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc63d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
</Link>
@ -248,7 +251,7 @@
<ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuuc62.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc63.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
</Link>
@ -264,7 +267,7 @@
<ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuuc62d.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc63d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
</Link>
@ -304,6 +307,7 @@
<ClCompile Include="ustack.cpp" />
<ClCompile Include="ustrenum.cpp" />
<ClCompile Include="utrie.cpp" />
<ClCompile Include="utrie_swap.cpp" />
<ClCompile Include="utrie2.cpp" />
<ClCompile Include="utrie2_builder.cpp" />
<ClCompile Include="uvector.cpp" />
@ -319,9 +323,7 @@
<ClCompile Include="umutex.cpp" />
<ClCompile Include="utrace.cpp" />
<ClCompile Include="utypes.cpp" />
<ClCompile Include="wintz.cpp">
<ExcludedFromBuild>true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="wintz.cpp" />
<ClCompile Include="ucnv.cpp" />
<ClCompile Include="ucnv2022.cpp" />
<ClCompile Include="ucnv_bld.cpp" />
@ -394,6 +396,7 @@
<ClCompile Include="ruleiter.cpp" />
<ClCompile Include="ucase.cpp" />
<ClCompile Include="uchar.cpp" />
<ClCompile Include="characterproperties.cpp" />
<ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp" />
<ClCompile Include="unifilt.cpp" />
@ -439,9 +442,11 @@
<ClCompile Include="ucharstrie.cpp" />
<ClCompile Include="ucharstriebuilder.cpp" />
<ClCompile Include="ucharstrieiterator.cpp" />
<ClCompile Include="ucptrie.cpp" />
<ClCompile Include="uchriter.cpp" />
<ClCompile Include="uinvchar.cpp" />
<ClCompile Include="uiter.cpp" />
<ClCompile Include="umutablecptrie.cpp" />
<ClCompile Include="unistr.cpp" />
<ClCompile Include="unistr_case.cpp" />
<ClCompile Include="unistr_case_locale.cpp" />
@ -457,8 +462,6 @@
<ClCompile Include="ustrtrns.cpp" />
<ClCompile Include="utext.cpp" />
<ClCompile Include="utf_impl.cpp" />
<ClCompile Include="listformatter.cpp" />
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="static_unicode_sets.cpp" />
</ItemGroup>
<ItemGroup>

Просмотреть файл

@ -325,9 +325,9 @@ foundBest:
// two characters after uc were not 0x0E4C THANTHAKHAT before
// checking the dictionary. That is just a performance filter,
// but it's not clear it's faster than checking the trie.
int32_t candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current + cuWordLength + chars);
if (candidates > 0) {
if (num_candidates > 0) {
break;
}
}
@ -555,9 +555,9 @@ foundBest:
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
// TODO: this looks iffy; compare with old code.
int32_t candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current + cuWordLength + chars);
if (candidates > 0) {
if (num_candidates > 0) {
break;
}
}
@ -748,9 +748,9 @@ foundBest:
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
// TODO: this looks iffy; compare with old code.
int32_t candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current + cuWordLength + chars);
if (candidates > 0) {
if (num_candidates > 0) {
break;
}
}
@ -953,9 +953,9 @@ foundBest:
uc = utext_current32(text);
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
int32_t candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current+cuWordLength+chars);
if (candidates > 0) {
if (num_candidates > 0) {
break;
}
}

Просмотреть файл

@ -276,7 +276,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
// ab deletions meet bc insertions at the same intermediate-string index.
// Some users expect the bc insertions to come first, so we fetch from bc first.
if (bc_bLength == 0) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
bc_bLength = bcIter.oldLength();
cLength = bcIter.newLength();
if (bc_bLength == 0) {
@ -293,7 +293,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
// else see if the other iterator is done, too.
}
if (ab_bLength == 0) {
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
aLength = abIter.oldLength();
ab_bLength = abIter.newLength();
if (ab_bLength == 0) {

Просмотреть файл

@ -18,6 +18,7 @@
#include "unicode/udata.h"
#include "unicode/localpointer.h"
#include "unicode/normalizer2.h"
#include "unicode/ucptrie.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cstring.h"
@ -42,12 +43,12 @@ private:
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
UDataMemory *memory;
UTrie2 *ownedTrie;
UCPTrie *ownedTrie;
};
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
udata_close(memory);
utrie2_close(ownedTrie);
ucptrie_close(ownedTrie);
}
UBool U_CALLCONV
@ -62,7 +63,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==3
pInfo->formatVersion[0]==4
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
@ -91,9 +92,9 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
inBytes+offset, nextOffset-offset, NULL,
&errorCode);
ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
inBytes+offset, nextOffset-offset, NULL,
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
@ -131,15 +132,26 @@ U_CDECL_BEGIN
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
U_CDECL_END
static Norm2AllModes *nfkcSingleton;
static Norm2AllModes *nfkc_cfSingleton;
static UHashtable *cache=NULL;
#if !NORM2_HARDCODE_NFC_DATA
static Norm2AllModes *nfcSingleton;
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
#endif
static Norm2AllModes *nfkcSingleton;
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
static Norm2AllModes *nfkc_cfSingleton;
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
static UHashtable *cache=NULL;
// UInitOnce singleton initialization function
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
#if !NORM2_HARDCODE_NFC_DATA
if (uprv_strcmp(what, "nfc") == 0) {
nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
} else
#endif
if (uprv_strcmp(what, "nfkc") == 0) {
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
@ -157,19 +169,36 @@ static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
}
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
#if !NORM2_HARDCODE_NFC_DATA
delete nfcSingleton;
nfcSingleton = NULL;
nfcInitOnce.reset();
#endif
delete nfkcSingleton;
nfkcSingleton = NULL;
nfkcInitOnce.reset();
delete nfkc_cfSingleton;
nfkc_cfSingleton = NULL;
nfkc_cfInitOnce.reset();
uhash_close(cache);
cache=NULL;
nfkcInitOnce.reset();
nfkc_cfInitOnce.reset();
return TRUE;
}
U_CDECL_END
#if !NORM2_HARDCODE_NFC_DATA
const Norm2AllModes *
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
return nfcSingleton;
}
#endif
const Norm2AllModes *
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
@ -184,6 +213,36 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
return nfkc_cfSingleton;
}
#if !NORM2_HARDCODE_NFC_DATA
const Normalizer2 *
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->comp : NULL;
}
const Normalizer2 *
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->decomp : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->fcd : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? &allModes->fcc : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
#endif
const Normalizer2 *
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
@ -247,7 +306,7 @@ Normalizer2::getInstance(const char *packageName,
}
void *temp=uhash_get(cache, name);
if(temp==NULL) {
int32_t keyLength=uprv_strlen(name)+1;
int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
char *nameCopy=(char *)uprv_malloc(keyLength);
if(nameCopy==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;

Просмотреть файл

@ -45,9 +45,9 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
}
va_start(args, buflen);
while ((str = va_arg(args, char *))) {
while ((str = va_arg(args, char *)) != 0) {
char c;
while (p != e && (c = *str++)) {
while (p != e && (c = *str++) != 0) {
*p++ = c;
}
}
@ -98,7 +98,7 @@ ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
: path(NULL), locale(Locale::getRoot())
{
if (path) {
int32_t len = uprv_strlen(path);
int32_t len = static_cast<int32_t>(uprv_strlen(path));
this->path = (const char*) uprv_malloc(len + 1);
if (this->path) {
uprv_strcpy((char *)this->path, path);
@ -560,21 +560,21 @@ LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
}
UnicodeString&
LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
UnicodeString& result) const {
if (locale.isBogus()) {
if (loc.isBogus()) {
result.setToBogus();
return result;
}
UnicodeString resultName;
const char* lang = locale.getLanguage();
const char* lang = loc.getLanguage();
if (uprv_strlen(lang) == 0) {
lang = "root";
}
const char* script = locale.getScript();
const char* country = locale.getCountry();
const char* variant = locale.getVariant();
const char* script = loc.getScript();
const char* country = loc.getCountry();
const char* variant = loc.getVariant();
UBool hasScript = uprv_strlen(script) > 0;
UBool hasCountry = uprv_strlen(country) > 0;
@ -630,14 +630,14 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
LocalPointer<StringEnumeration> e(locale.createKeywords(status));
LocalPointer<StringEnumeration> e(loc.createKeywords(status));
if (e.isValid() && U_SUCCESS(status)) {
UnicodeString temp2;
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
const char* key;
while ((key = e->next((int32_t *)0, status)) != NULL) {
value[0] = 0;
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
return result;
}

Просмотреть файл

@ -31,9 +31,12 @@
******************************************************************************
*/
#include <utility>
#include "unicode/bytestream.h"
#include "unicode/locid.h"
#include "unicode/strenum.h"
#include "unicode/stringpiece.h"
#include "unicode/uloc.h"
#include "putilimp.h"
#include "mutex.h"
@ -43,9 +46,11 @@
#include "cstring.h"
#include "uassert.h"
#include "uhash.h"
#include "ulocimp.h"
#include "ucln_cmn.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "bytesinkutil.h"
U_CDECL_BEGIN
static UBool U_CALLCONV locale_cleanup(void);
@ -424,49 +429,70 @@ Locale::Locale(const Locale &other)
*this = other;
}
Locale &Locale::operator=(const Locale &other)
{
Locale::Locale(Locale&& other) U_NOEXCEPT
: UObject(other), fullName(fullNameBuffer), baseName(fullName) {
*this = std::move(other);
}
Locale& Locale::operator=(const Locale& other) {
if (this == &other) {
return *this;
}
/* Free our current storage */
if (baseName != fullName) {
uprv_free(baseName);
}
baseName = NULL;
if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
setToBogus();
if (other.fullName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
} else if (other.fullName == nullptr) {
fullName = nullptr;
} else {
fullName = uprv_strdup(other.fullName);
if (fullName == nullptr) return *this;
}
/* Allocate the full name if necessary */
if(other.fullName != other.fullNameBuffer) {
fullName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(other.fullName)+1));
if (fullName == NULL) {
return *this;
}
}
/* Copy the full name */
uprv_strcpy(fullName, other.fullName);
/* Copy the baseName if it differs from fullName. */
if (other.baseName == other.fullName) {
baseName = fullName;
} else {
if (other.baseName) {
baseName = uprv_strdup(other.baseName);
}
} else if (other.baseName != nullptr) {
baseName = uprv_strdup(other.baseName);
if (baseName == nullptr) return *this;
}
/* Copy the language and country fields */
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
/* The variantBegin is an offset, just copy it */
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
return *this;
}
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
if (baseName != fullName) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
fullName = fullNameBuffer;
} else {
fullName = other.fullName;
}
if (other.baseName == other.fullName) {
baseName = fullName;
} else {
baseName = other.baseName;
}
uprv_strcpy(language, other.language);
uprv_strcpy(script, other.script);
uprv_strcpy(country, other.country);
variantBegin = other.variantBegin;
fIsBogus = other.fIsBogus;
other.baseName = other.fullName = other.fullNameBuffer;
return *this;
}
@ -545,7 +571,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
/* after uloc_getName/canonicalize() we know that only '_' are separators */
separator = field[0] = fullName;
fieldIdx = 1;
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) && fieldIdx < UPRV_LENGTHOF(field)-1) {
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && fieldIdx < UPRV_LENGTHOF(field)-1) {
field[fieldIdx] = separator + 1;
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
fieldIdx++;
@ -652,7 +678,7 @@ Locale::initBaseName(UErrorCode &status) {
int32_t
Locale::hashCode() const
{
return ustr_hashCharsN(fullName, uprv_strlen(fullName));
return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
}
void
@ -704,6 +730,276 @@ Locale::setDefault( const Locale& newLocale,
locale_set_default_internal(localeID, status);
}
void
Locale::addLikelySubtags(UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// The maximized locale ID string is often longer, but there is no good
// heuristic to estimate just how much longer. Leave that to CharString.
CharString maximizedLocaleID;
int32_t maximizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = maximizedLocaleID.getAppendBuffer(
/*minCapacity=*/maximizedLocaleIDCapacity,
/*desiredCapacityHint=*/maximizedLocaleIDCapacity,
maximizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_addLikelySubtags(
fullName,
buffer,
maximizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
maximizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
maximizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return;
}
init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
void
Locale::minimizeSubtags(UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
// Except for a few edge cases (like the empty string, that is minimized to
// "en__POSIX"), minimized locale ID strings will be either the same length
// or shorter than their input.
CharString minimizedLocaleID;
int32_t minimizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
char* buffer;
int32_t reslen;
for (;;) {
buffer = minimizedLocaleID.getAppendBuffer(
/*minCapacity=*/minimizedLocaleIDCapacity,
/*desiredCapacityHint=*/minimizedLocaleIDCapacity,
minimizedLocaleIDCapacity,
status);
if (U_FAILURE(status)) {
return;
}
reslen = uloc_minimizeSubtags(
fullName,
buffer,
minimizedLocaleIDCapacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// Because of the internal minimal buffer size of CharString, I can't
// think of any input data for which this could possibly ever happen.
// Maybe it would be better replaced with an assertion instead?
minimizedLocaleIDCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
minimizedLocaleID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return;
}
init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
if (isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
Locale U_EXPORT2
Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
{
Locale result(Locale::eBOGUS);
if (U_FAILURE(status)) {
return result;
}
// If a BCP-47 language tag is passed as the language parameter to the
// normal Locale constructor, it will actually fall back to invoking
// uloc_forLanguageTag() to parse it if it somehow is able to detect that
// the string actually is BCP-47. This works well for things like strings
// using BCP-47 extensions, but it does not at all work for things like
// BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
// interpret as ICU locale IDs and because of that won't trigger the BCP-47
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
// and then Locale::init(), instead of just calling the normal constructor.
// All simple language tags will have the exact same length as ICU locale
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
CharString localeID;
int32_t resultCapacity = tag.size();
char* buffer;
int32_t parsedLength, reslen;
for (;;) {
buffer = localeID.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
status);
if (U_FAILURE(status)) {
return result;
}
reslen = ulocimp_forLanguageTag(
tag.data(),
tag.length(),
buffer,
resultCapacity,
&parsedLength,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// For all BCP-47 language tags that use extensions, the corresponding
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
// the exact length needed so this memory reallocation will be done at
// most once.
resultCapacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return result;
}
if (parsedLength != tag.size()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return result;
}
localeID.append(buffer, reslen, status);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(status)) {
return result;
}
result.init(localeID.data(), /*canonicalize=*/FALSE);
if (result.isBogus()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return result;
}
void
Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
{
if (U_FAILURE(status)) {
return;
}
if (fIsBogus) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// All simple language tags will have the exact same length as BCP-47
// strings as they have as ICU locale IDs (like "en-US" for "en_US").
LocalMemory<char> scratch;
int32_t scratch_capacity = static_cast<int32_t>(uprv_strlen(fullName));
if (scratch_capacity == 0) {
scratch_capacity = 3; // "und"
}
char* buffer;
int32_t result_capacity, reslen;
for (;;) {
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
buffer = sink.GetAppendBuffer(
/*min_capacity=*/scratch_capacity,
/*desired_capacity_hint=*/scratch_capacity,
scratch.getAlias(),
scratch_capacity,
&result_capacity);
reslen = uloc_toLanguageTag(
fullName,
buffer,
result_capacity,
/*strict=*/FALSE,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
// For some very few edge cases a language tag will be longer as a
// BCP-47 string than it is as an ICU locale ID. Most notoriously "C"
// expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and
// it'll take several calls to uloc_toLanguageTag() to figure that out.
// https://unicode-org.atlassian.net/browse/ICU-20132
scratch_capacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
sink.Append(buffer, reslen);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators not used.
}
}
Locale U_EXPORT2
Locale::createFromName (const char *name)
{
@ -1010,20 +1306,84 @@ KeywordEnumeration::~KeywordEnumeration() {
uprv_free(keywords);
}
// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
// the next() method for each keyword before returning it.
class UnicodeKeywordEnumeration : public KeywordEnumeration {
public:
using KeywordEnumeration::KeywordEnumeration;
virtual ~UnicodeKeywordEnumeration();
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
if (U_SUCCESS(status) && legacy_key != nullptr) {
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
if (key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
if (resultLength != nullptr) {
*resultLength = static_cast<int32_t>(uprv_strlen(key));
}
return key;
}
}
if (resultLength != nullptr) *resultLength = 0;
return nullptr;
}
};
// Out-of-line virtual destructor to serve as the "key function".
UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
StringEnumeration *
Locale::createKeywords(UErrorCode &status) const
{
char keywords[256];
int32_t keywordCapacity = 256;
int32_t keywordCapacity = sizeof keywords;
StringEnumeration *result = NULL;
if (U_FAILURE(status)) {
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
if(keyLen) {
if(U_SUCCESS(status) && keyLen) {
result = new KeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
} else {
status = U_INVALID_FORMAT_ERROR;
}
}
return result;
}
StringEnumeration *
Locale::createUnicodeKeywords(UErrorCode &status) const
{
char keywords[256];
int32_t keywordCapacity = sizeof keywords;
StringEnumeration *result = NULL;
if (U_FAILURE(status)) {
return result;
}
const char* variantStart = uprv_strchr(fullName, '@');
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
if(U_SUCCESS(status) && keyLen) {
result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
} else {
status = U_INVALID_FORMAT_ERROR;
@ -1038,6 +1398,105 @@ Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, U
return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
}
void
Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
if (U_FAILURE(status)) {
return;
}
if (fIsBogus) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
LocalMemory<char> scratch;
int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
char* buffer;
int32_t result_capacity, reslen;
for (;;) {
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
buffer = sink.GetAppendBuffer(
/*min_capacity=*/scratch_capacity,
/*desired_capacity_hint=*/scratch_capacity,
scratch.getAlias(),
scratch_capacity,
&result_capacity);
reslen = uloc_getKeywordValue(
fullName,
keywordName_nul.data(),
buffer,
result_capacity,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
break;
}
scratch_capacity = reslen;
status = U_ZERO_ERROR;
}
if (U_FAILURE(status)) {
return;
}
sink.Append(buffer, reslen);
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR; // Terminators not used.
}
}
void
Locale::getUnicodeKeywordValue(StringPiece keywordName,
ByteSink& sink,
UErrorCode& status) const {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
if (U_FAILURE(status)) {
return;
}
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
CharString legacy_value;
{
CharStringByteSink sink(&legacy_value);
getKeywordValue(legacy_key, sink, status);
}
if (U_FAILURE(status)) {
return;
}
const char* unicode_value = uloc_toUnicodeLocaleType(
keywordName_nul.data(), legacy_value.data());
if (unicode_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
}
void
Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
{
@ -1048,6 +1507,46 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
}
}
void
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
}
void
Locale::setUnicodeKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
if (U_FAILURE(status)) {
return;
}
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
if (legacy_key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
const char* legacy_value =
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
if (legacy_value == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
setKeywordValue(legacy_key, legacy_value, status);
}
const char *
Locale::getBaseName() const {
return baseName;

Просмотреть файл

@ -34,9 +34,9 @@ U_NAMESPACE_BEGIN
// private mutex where possible.
// For example:
//
// UMutex myMutex;
//
//
// UMutex myMutex = U_MUTEX_INITIALIZER;
//
// void Function(int arg1, int arg2)
// {
// static Object* foo; // Shared read-write object

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -34,9 +34,11 @@
using icu::Normalizer2Impl;
#if NORM2_HARDCODE_NFC_DATA
// NFC/NFD data machine-generated by gennorm2 --csource
#define INCLUDED_FROM_NORMALIZER2_CPP
#include "norm2_nfc_data.h"
#endif
U_NAMESPACE_BEGIN
@ -176,6 +178,36 @@ FCDNormalizer2::~FCDNormalizer2() {}
// instance cache ---------------------------------------------------------- ***
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_normalizer2_cleanup();
U_CDECL_END
static Normalizer2 *noopSingleton;
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
noopSingleton=new NoopNormalizer2;
if(noopSingleton==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
}
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
return noopSingleton;
}
const Normalizer2Impl *
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
Norm2AllModes::~Norm2AllModes() {
delete impl;
}
@ -195,6 +227,7 @@ Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
return allModes;
}
#if NORM2_HARDCODE_NFC_DATA
Norm2AllModes *
Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
@ -210,48 +243,15 @@ Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
return createInstance(impl, errorCode);
}
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_normalizer2_cleanup();
U_CDECL_END
static Norm2AllModes *nfcSingleton;
static Normalizer2 *noopSingleton;
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
// UInitOnce singleton initialization functions
static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
}
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
noopSingleton=new NoopNormalizer2;
if(noopSingleton==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
}
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
delete nfcSingleton;
nfcSingleton = NULL;
delete noopSingleton;
noopSingleton = NULL;
nfcInitOnce.reset();
noopInitOnce.reset();
return TRUE;
}
U_CDECL_END
const Norm2AllModes *
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
@ -281,23 +281,29 @@ const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
return allModes!=NULL ? &allModes->fcc : NULL;
}
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return NULL; }
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
return noopSingleton;
}
const Normalizer2Impl *
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
return allModes!=NULL ? allModes->impl : NULL;
}
#endif // NORM2_HARDCODE_NFC_DATA
const Normalizer2Impl *
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
delete noopSingleton;
noopSingleton = NULL;
noopInitOnce.reset();
#if NORM2_HARDCODE_NFC_DATA
delete nfcSingleton;
nfcSingleton = NULL;
nfcInitOnce.reset();
#endif
return TRUE;
}
U_CDECL_END
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***

Просмотреть файл

@ -16,6 +16,8 @@
* created by: Markus W. Scherer
*/
// #define UCPTRIE_DEBUG
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
@ -24,7 +26,9 @@
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/ucptrie.h"
#include "unicode/udata.h"
#include "unicode/umutablecptrie.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "unicode/utf8.h"
@ -34,8 +38,8 @@
#include "normalizer2impl.h"
#include "putilimp.h"
#include "uassert.h"
#include "ucptrie_impl.h"
#include "uset_imp.h"
#include "utrie2.h"
#include "uvector.h"
U_NAMESPACE_BEGIN
@ -62,7 +66,7 @@ inline uint8_t leadByteForCP(UChar32 c) {
* Returns the code point from one single well-formed UTF-8 byte sequence
* between cpStart and cpLimit.
*
* UTrie2 UTF-8 macros do not assemble whole code points (for efficiency).
* Trie UTF-8 macros do not assemble whole code points (for efficiency).
* When we do need the code point, we call this function.
* We should not need it for normalization-inert data (norm16==0).
* Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
@ -122,7 +126,7 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
}
} else if (src[1] == 0x87) {
uint8_t t = src[2];
if ((int8_t)t <= (int8_t)0x82) {
if ((int8_t)t <= (int8_t)0x82u) {
return t - (0xa7 - 0x40);
}
}
@ -253,7 +257,7 @@ UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &e
return TRUE;
}
UBool ReorderingBuffer::append(const UChar *s, int32_t length,
UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
uint8_t leadCC, uint8_t trailCC,
UErrorCode &errorCode) {
if(length==0) {
@ -280,8 +284,11 @@ UBool ReorderingBuffer::append(const UChar *s, int32_t length,
while(i<length) {
U16_NEXT(s, i, length, c);
if(i<length) {
// s must be in NFD, otherwise we need to use getCC().
leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
if (isNFD) {
leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
} else {
leadCC = impl.getCC(impl.getNorm16(c));
}
} else {
leadCC=trailCC;
}
@ -411,7 +418,8 @@ struct CanonIterData : public UMemory {
CanonIterData(UErrorCode &errorCode);
~CanonIterData();
void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
UTrie2 *trie;
UMutableCPTrie *mutableTrie;
UCPTrie *trie;
UVector canonStartSets; // contains UnicodeSet *
};
@ -420,22 +428,22 @@ Normalizer2Impl::~Normalizer2Impl() {
}
void
Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
minLcccCP=inIndexes[IX_MIN_LCCC_CP];
minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
minYesNo=inIndexes[IX_MIN_YES_NO];
minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
minNoNo=inIndexes[IX_MIN_NO_NO];
minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
limitNoNo=inIndexes[IX_LIMIT_NO_NO];
minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
U_ASSERT((minMaybeYes&7)==0); // 8-aligned for noNoDelta bit fields
centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields
centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
normTrie=inTrie;
@ -445,75 +453,8 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
smallFCD=inSmallFCD;
}
class LcccContext {
public:
LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
norm16 != Normalizer2Impl::JAMO_VT) {
set.add(start, end);
} else if (impl.minNoNoCompNoMaybeCC <= norm16 && norm16 < impl.limitNoNo) {
uint16_t fcd16=impl.getFCD16(start);
if(fcd16>0xff) { set.add(start, end); }
}
}
private:
const Normalizer2Impl &impl;
UnicodeSet &set;
};
namespace {
struct PropertyStartsContext {
PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
: impl(ni), sa(adder) {}
const Normalizer2Impl &impl;
const USetAdder *sa;
};
} // namespace
U_CDECL_BEGIN
static UBool U_CALLCONV
enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
return TRUE;
}
static UBool U_CALLCONV
enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
/* add the start code point to the USet */
const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;
const USetAdder *sa=ctx->sa;
sa->add(sa->set, start);
if (start != end && ctx->impl.isAlgorithmicNoNo((uint16_t)value) &&
(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
// Range of code points with same-norm16-value algorithmic decompositions.
// They might have different non-zero FCD16 values.
uint16_t prevFCD16=ctx->impl.getFCD16(start);
while(++start<=end) {
uint16_t fcd16=ctx->impl.getFCD16(start);
if(fcd16!=prevFCD16) {
sa->add(sa->set, start);
prevFCD16=fcd16;
}
}
}
return TRUE;
}
static UBool U_CALLCONV
enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
/* add the start code point to the USet */
const USetAdder *sa=(const USetAdder *)context;
sa->add(sa->set, start);
return TRUE;
}
static uint32_t U_CALLCONV
segmentStarterMapper(const void * /*context*/, uint32_t value) {
return value&CANON_NOT_SEGMENT_STARTER;
@ -523,15 +464,44 @@ U_CDECL_END
void
Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
LcccContext context(*this, set);
utrie2_enum(normTrie, NULL, enumLcccRange, &context);
UChar32 start = 0, end;
uint32_t norm16;
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
nullptr, nullptr, &norm16)) >= 0) {
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
norm16 != Normalizer2Impl::JAMO_VT) {
set.add(start, end);
} else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
uint16_t fcd16 = getFCD16(start);
if (fcd16 > 0xff) { set.add(start, end); }
}
start = end + 1;
}
}
void
Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
/* add the start code point of each same-value range of each trie */
PropertyStartsContext context(*this, sa);
utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
// Add the start code point of each same-value range of the trie.
UChar32 start = 0, end;
uint32_t value;
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
nullptr, nullptr, &value)) >= 0) {
sa->add(sa->set, start);
if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
// Range of code points with same-norm16-value algorithmic decompositions.
// They might have different non-zero FCD16 values.
uint16_t prevFCD16 = getFCD16(start);
while (++start <= end) {
uint16_t fcd16 = getFCD16(start);
if (fcd16 != prevFCD16) {
sa->add(sa->set, start);
prevFCD16 = fcd16;
}
}
}
start = end + 1;
}
/* add Hangul LV syllables and LV+1 because of skippables */
for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
@ -543,10 +513,15 @@ Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode
void
Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
/* add the start code point of each same-value range of the canonical iterator data trie */
if(ensureCanonIterData(errorCode)) {
// currently only used for the SEGMENT_STARTER property
utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
// Add the start code point of each same-value range of the canonical iterator data trie.
if (!ensureCanonIterData(errorCode)) { return; }
// Currently only used for the SEGMENT_STARTER property.
UChar32 start = 0, end;
uint32_t value;
while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
segmentStarterMapper, nullptr, &value)) >= 0) {
sa->add(sa->set, start);
start = end + 1;
}
}
@ -633,27 +608,23 @@ Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
// count code units below the minimum or with irrelevant data for the quick check
for(prevSrc=src; src!=limit;) {
if( (c=*src)<minNoCP ||
isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
) {
++src;
} else if(!U16_IS_SURROGATE(c)) {
} else if(!U16_IS_LEAD(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
if(isMostDecompYesAndZeroCC(norm16)) {
src+=2;
} else {
break;
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
src+=U16_LENGTH(c);
} else {
break;
++src; // unpaired lead surrogate: inert
}
}
}
@ -713,7 +684,7 @@ Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
const UChar *prevSrc = src;
UChar32 c;
uint16_t norm16;
UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
return prevSrc;
}
@ -737,7 +708,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
}
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
norm16=getRawNorm16(c);
}
if (norm16 < minYesNo) {
// c does not decompose
@ -758,7 +729,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
} else {
leadCC=0;
}
return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
}
const uint8_t *
@ -771,7 +742,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
while (src < limit) {
const uint8_t *prevSrc = src;
uint16_t norm16;
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
// Get the decomposition and the lead and trail cc's.
UChar32 c = U_SENTINEL;
if (norm16 >= limitNoNo) {
@ -789,7 +760,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
}
c = codePointFromValidUTF8(prevSrc, src);
c = mapAlgorithmic(c, norm16);
norm16 = getNorm16(c);
norm16 = getRawNorm16(c);
} else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
return prevSrc;
}
@ -828,7 +799,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
} else {
leadCC = 0;
}
if (!buffer.append((const char16_t *)mapping+1, length, leadCC, trailCC, errorCode)) {
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
return nullptr;
}
}
@ -854,7 +825,7 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c
length=0;
U16_APPEND_UNSAFE(buffer, length, c);
// The mapping might decompose further.
norm16 = getNorm16(c);
norm16 = getRawNorm16(c);
}
if (norm16 < minYesNo) {
return decomp;
@ -926,19 +897,30 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
return;
}
// Just merge the strings at the boundary.
ForwardUTrie2StringIterator iter(normTrie, src, limit);
uint8_t firstCC, prevCC, cc;
firstCC=prevCC=cc=getCC(iter.next16());
while(cc!=0) {
prevCC=cc;
cc=getCC(iter.next16());
};
bool isFirst = true;
uint8_t firstCC = 0, prevCC = 0, cc;
const UChar *p = src;
while (p != limit) {
const UChar *codePointStart = p;
UChar32 c;
uint16_t norm16;
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
if ((cc = getCC(norm16)) == 0) {
p = codePointStart;
break;
}
if (isFirst) {
firstCC = cc;
isFirst = false;
}
prevCC = cc;
}
if(limit==NULL) { // appendZeroCC() needs limit!=NULL
limit=u_strchr(iter.codePointStart, 0);
limit=u_strchr(p, 0);
}
if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) {
buffer.appendZeroCC(p, limit, errorCode);
}
}
@ -1085,7 +1067,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const
}
UChar32 composite=compositeAndFwd>>1;
if((compositeAndFwd&1)!=0) {
addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
}
set.add(composite);
} while((firstUnit&COMP_1_LAST_TUPLE)==0);
@ -1124,7 +1106,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
prevCC=0;
for(;;) {
UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
cc=getCCFromYesOrMaybe(norm16);
if( // this character combines backward and
isMaybe(norm16) &&
@ -1229,7 +1211,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
// Is the composite a starter that combines forward?
if(compositeAndFwd&1) {
compositionsList=
getCompositionsListForComposite(getNorm16(composite));
getCompositionsListForComposite(getRawNorm16(composite));
} else {
compositionsList=NULL;
}
@ -1268,7 +1250,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
UChar32
Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0
uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16
const uint16_t *list;
if(isInert(norm16)) {
return U_SENTINEL;
@ -1359,28 +1341,22 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
return TRUE;
}
if( (c=*src)<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
) {
++src;
} else {
prevSrc = src++;
if(!U16_IS_SURROGATE(c)) {
if(!U16_IS_LEAD(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
++src;
c=U16_GET_SUPPLEMENTARY(c, c2);
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
++src;
c=U16_GET_SUPPLEMENTARY(c, c2);
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
if(!isCompYesAndZeroCC(norm16)) {
break;
}
} else /* trail surrogate */ {
if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
--prevSrc;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
break;
}
}
}
@ -1529,7 +1505,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
}
uint8_t prevCC = cc;
nextSrc = src;
UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, n16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
if (n16 >= MIN_YES_YES_WITH_CC) {
cc = getCCFromNormalYesOrMaybe(n16);
if (prevCC > cc) {
@ -1559,7 +1535,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
// decompose and recompose.
if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
const UChar *p = prevSrc;
UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, norm16);
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
prevSrc = p;
}
@ -1626,28 +1602,22 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
return src;
}
if( (c=*src)<minNoMaybeCP ||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
) {
++src;
} else {
prevSrc = src++;
if(!U16_IS_SURROGATE(c)) {
if(!U16_IS_LEAD(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
++src;
c=U16_GET_SUPPLEMENTARY(c, c2);
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
++src;
c=U16_GET_SUPPLEMENTARY(c, c2);
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
if(!isCompYesAndZeroCC(norm16)) {
break;
}
} else /* trail surrogate */ {
if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
--prevSrc;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
break;
}
}
}
@ -1665,7 +1635,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
} else {
const UChar *p = prevSrc;
uint16_t n16;
UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, n16);
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
prevBoundary = prevSrc;
} else {
@ -1699,7 +1669,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
}
uint8_t prevCC = cc;
nextSrc = src;
UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, norm16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
if (isMaybeOrNonZeroCC(norm16)) {
cc = getCCFromYesOrMaybe(norm16);
if (!(prevCC <= cc || cc == 0)) {
@ -1786,7 +1756,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
++src;
} else {
prevSrc = src;
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
if (!isCompYesAndZeroCC(norm16)) {
break;
}
@ -1945,7 +1915,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
}
uint8_t prevCC = cc;
nextSrc = src;
UTRIE2_U8_NEXT16(normTrie, nextSrc, limit, n16);
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
if (n16 >= MIN_YES_YES_WITH_CC) {
cc = getCCFromNormalYesOrMaybe(n16);
if (prevCC > cc) {
@ -1975,7 +1945,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
// decompose and recompose.
if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
const uint8_t *p = prevSrc;
UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16);
UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
prevSrc = p;
}
@ -2023,7 +1993,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limi
}
UChar32 c;
uint16_t norm16;
UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
return norm16HasCompBoundaryBefore(norm16);
}
@ -2032,7 +2002,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *
return TRUE;
}
uint16_t norm16;
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
return norm16HasCompBoundaryBefore(norm16);
}
@ -2043,7 +2013,7 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,
}
UChar32 c;
uint16_t norm16;
UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
}
@ -2053,36 +2023,42 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t
return TRUE;
}
uint16_t norm16;
UTRIE2_U8_PREV16(normTrie, start, p, norm16);
UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
}
const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,
UBool onlyContiguous) const {
BackwardUTrie2StringIterator iter(normTrie, start, p);
for(;;) {
uint16_t norm16=iter.previous16();
while (p != start) {
const UChar *codePointLimit = p;
UChar32 c;
uint16_t norm16;
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
return iter.codePointLimit;
return codePointLimit;
}
if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
return iter.codePointStart;
if (hasCompBoundaryBefore(c, norm16)) {
return p;
}
}
return p;
}
const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,
UBool onlyContiguous) const {
ForwardUTrie2StringIterator iter(normTrie, p, limit);
for(;;) {
uint16_t norm16=iter.next16();
if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
return iter.codePointStart;
while (p != limit) {
const UChar *codePointStart = p;
UChar32 c;
uint16_t norm16;
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
if (hasCompBoundaryBefore(c, norm16)) {
return codePointStart;
}
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
return iter.codePointLimit;
return p;
}
}
return p;
}
uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {
@ -2130,7 +2106,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
}
// Maps to an isCompYesAndZeroCC.
c=mapAlgorithmic(c, norm16);
norm16=getNorm16(c);
norm16=getRawNorm16(c);
}
}
if(norm16<=minYesNo || isHangulLVT(norm16)) {
@ -2195,17 +2171,10 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
prevFCD16=0;
++src;
} else {
if(U16_IS_SURROGATE(c)) {
if(U16_IS_LEAD(c)) {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
}
if((fcd16=getFCD16FromNormData(c))<=0xff) {
@ -2336,7 +2305,7 @@ const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const
const UChar *codePointLimit = p;
UChar32 c;
uint16_t norm16;
UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
return codePointLimit;
}
@ -2352,7 +2321,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
const UChar *codePointStart=p;
UChar32 c;
uint16_t norm16;
UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
return codePointStart;
}
@ -2366,19 +2335,20 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
// CanonicalIterator data -------------------------------------------------- ***
CanonIterData::CanonIterData(UErrorCode &errorCode) :
trie(utrie2_open(0, 0, &errorCode)),
mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
CanonIterData::~CanonIterData() {
utrie2_close(trie);
umutablecptrie_close(mutableTrie);
ucptrie_close(trie);
}
void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
uint32_t canonValue=utrie2_get32(trie, decompLead);
uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
// origin is the first character whose decomposition starts with
// the character for which we are setting the value.
utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
} else {
// origin is not the first character, or it is U+0000.
UnicodeSet *set;
@ -2390,7 +2360,7 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
}
UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
utrie2_set32(trie, decompLead, canonValue, &errorCode);
umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
canonStartSets.addElement(set, errorCode);
if(firstOrigin!=0) {
set->add(firstOrigin);
@ -2406,7 +2376,6 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
class InitCanonIterData {
public:
static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
static void handleRange(Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode);
};
U_CDECL_BEGIN
@ -2417,18 +2386,6 @@ initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
InitCanonIterData::doInit(impl, errorCode);
}
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
// context: the Normalizer2Impl
static UBool U_CALLCONV
enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
UErrorCode errorCode = U_ZERO_ERROR;
if (value != Normalizer2Impl::INERT) {
Normalizer2Impl *impl = (Normalizer2Impl *)context;
InitCanonIterData::handleRange(impl, start, end, (uint16_t)value, errorCode);
}
return U_SUCCESS(errorCode);
}
U_CDECL_END
void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
@ -2438,8 +2395,24 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
if (U_SUCCESS(errorCode)) {
utrie2_enum(impl->normTrie, NULL, enumCIDRangeHandler, impl);
utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
UChar32 start = 0, end;
uint32_t value;
while ((end = ucptrie_getRange(impl->normTrie, start,
UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
nullptr, nullptr, &value)) >= 0) {
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
if (value != Normalizer2Impl::INERT) {
impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
}
start = end + 1;
}
#ifdef UCPTRIE_DEBUG
umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
#endif
impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
umutablecptrie_close(impl->fCanonIterData->mutableTrie);
impl->fCanonIterData->mutableTrie = nullptr;
}
if (U_FAILURE(errorCode)) {
delete impl->fCanonIterData;
@ -2447,11 +2420,6 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
}
}
void InitCanonIterData::handleRange(
Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode) {
impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
}
void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
CanonIterData &newData,
UErrorCode &errorCode) const {
@ -2465,7 +2433,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
return;
}
for(UChar32 c=start; c<=end; ++c) {
uint32_t oldValue=utrie2_get32(newData.trie, c);
uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
uint32_t newValue=oldValue;
if(isMaybeOrNonZeroCC(norm16)) {
// not a segment starter if it occurs in a decomposition or has cc!=0
@ -2483,7 +2451,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
if (isDecompNoAlgorithmic(norm16_2)) {
// Maps to an isCompYesAndZeroCC.
c2 = mapAlgorithmic(c2, norm16_2);
norm16_2 = getNorm16(c2);
norm16_2 = getRawNorm16(c2);
// No compatibility mappings for the CanonicalIterator.
U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
}
@ -2510,10 +2478,10 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
if(norm16_2>=minNoNo) {
while(i<length) {
U16_NEXT_UNSAFE(mapping, i, c2);
uint32_t c2Value=utrie2_get32(newData.trie, c2);
uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
&errorCode);
umutablecptrie_set(newData.mutableTrie, c2,
c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
}
}
}
@ -2524,7 +2492,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
}
}
if(newValue!=oldValue) {
utrie2_set32(newData.trie, c, newValue, &errorCode);
umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
}
}
}
@ -2537,7 +2505,7 @@ UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
}
int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
return (int32_t)utrie2_get32(fCanonIterData->trie, c);
return (int32_t)ucptrie_get(fCanonIterData->trie, c);
}
const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
@ -2561,7 +2529,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
set.add(value);
}
if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
uint16_t norm16=getNorm16(c);
uint16_t norm16=getRawNorm16(c);
if(norm16==JAMO_L) {
UChar32 syllable=
(UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
@ -2608,7 +2576,7 @@ unorm2_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
(1<=formatVersion0 && formatVersion0<=3)
(1<=formatVersion0 && formatVersion0<=4)
)) {
udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
@ -2669,9 +2637,9 @@ unorm2_swap(const UDataSwapper *ds,
ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
offset=nextOffset;
/* swap the UTrie2 */
/* swap the trie */
nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
offset=nextOffset;
/* swap the uint16_t extraData[] */

Просмотреть файл

@ -24,12 +24,20 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/ucptrie.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "unicode/utf.h"
#include "unicode/utf16.h"
#include "mutex.h"
#include "udataswp.h"
#include "uset_imp.h"
#include "utrie2.h"
// When the nfc.nrm data is *not* hardcoded into the common library
// (with this constant set to 0),
// then it needs to be built into the data package:
// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT
#define NORM2_HARDCODE_NFC_DATA 1
U_NAMESPACE_BEGIN
@ -118,7 +126,7 @@ public:
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
} else {
buffer[0]=orig-c2; // LV syllable
buffer[0]=(UChar)(orig-c2); // LV syllable
buffer[1]=(UChar)(JAMO_T_BASE+c2);
}
}
@ -158,8 +166,7 @@ public:
appendBMP((UChar)c, cc, errorCode) :
appendSupplementary(c, cc, errorCode);
}
// s must be in NFD, otherwise change the implementation.
UBool append(const UChar *s, int32_t length,
UBool append(const UChar *s, int32_t length, UBool isNFD,
uint8_t leadCC, uint8_t trailCC,
UErrorCode &errorCode);
UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
@ -243,7 +250,7 @@ public:
}
virtual ~Normalizer2Impl();
void init(const int32_t *inIndexes, const UTrie2 *inTrie,
void init(const int32_t *inIndexes, const UCPTrie *inTrie,
const uint16_t *inExtraData, const uint8_t *inSmallFCD);
void addLcccChars(UnicodeSet &set) const;
@ -254,7 +261,12 @@ public:
UBool ensureCanonIterData(UErrorCode &errorCode) const;
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
// The trie stores values for lead surrogate code *units*.
// Surrogate code *points* are inert.
uint16_t getNorm16(UChar32 c) const {
return U_IS_LEAD(c) ? INERT : UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
}
uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
@ -704,7 +716,7 @@ private:
uint16_t centerNoNoDelta;
uint16_t minMaybeYes;
const UTrie2 *normTrie;
const UCPTrie *normTrie;
const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
@ -764,7 +776,7 @@ unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.
* Format version 3.0.
* Format version 4.0.
*
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms
@ -818,7 +830,7 @@ unorm_getFCD16(UChar32 c);
* minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* See the normTrie description below and the design doc for details.
*
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
* UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie
*
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits),
@ -946,6 +958,20 @@ unorm_getFCD16(UChar32 c);
* which is artificially assigned "worst case" values lccc=1 and tccc=255.
*
* - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
*
* Changes from format version 3 to format version 4 (ICU 63) ------------------
*
* Switched from UTrie2 to UCPTrie/CodePointTrie.
*
* The new trie no longer stores different values for surrogate code *units* vs.
* surrogate code *points*.
* Lead surrogates still have values for optimized UTF-16 string processing.
* When looking up code point properties, the code now checks for lead surrogates and
* treats them as inert.
*
* gennorm2 now has to reject mappings for surrogate code points.
* UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
* custom normalization data file.
*/
#endif /* !UCONFIG_NO_NORMALIZATION */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -102,9 +102,8 @@
# define NOMCX
# include <windows.h>
# include "unicode/uloc.h"
#if U_PLATFORM_HAS_WINUWP_API == 0
# include "wintz.h"
#else // U_PLATFORM_HAS_WINUWP_API
#if U_PLATFORM_HAS_WINUWP_API
typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
#include <Windows.Globalization.h>
#include <windows.system.userprofile.h>
@ -1062,53 +1061,13 @@ uprv_tzname_clear_cache()
#endif
}
// With the Universal Windows Platform we can just ask Windows for the name
#if U_PLATFORM_HAS_WINUWP_API
U_CAPI const char* U_EXPORT2
uprv_getWindowsTimeZone()
{
// Get default Windows timezone.
ComPtr<IInspectable> calendar;
HRESULT hr = RoActivateInstance(
HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(),
&calendar);
if (SUCCEEDED(hr))
{
ComPtr<ABI::Windows::Globalization::ITimeZoneOnCalendar> timezone;
hr = calendar.As(&timezone);
if (SUCCEEDED(hr))
{
HString timezoneString;
hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
if (SUCCEEDED(hr))
{
int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
if (asciiId != nullptr)
{
u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length);
return asciiId;
}
}
}
}
// Failed
return nullptr;
}
#endif
U_CAPI const char* U_EXPORT2
uprv_tzname(int n)
{
(void)n; // Avoid unreferenced parameter warning.
const char *tzid = NULL;
#if U_PLATFORM_USES_ONLY_WIN32_API
#if U_PLATFORM_HAS_WINUWP_API > 0
tzid = uprv_getWindowsTimeZone();
#else
tzid = uprv_detectWindowsTimeZone();
#endif
if (tzid != NULL) {
return tzid;
@ -1366,6 +1325,43 @@ uprv_pathIsAbsolute(const char *path)
# endif
#endif
#if U_PLATFORM_HAS_WINUWP_API != 0
// Helper function to get the ICU Data Directory under the Windows directory location.
static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
{
#if defined(ICU_DATA_DIR_WINDOWS)
wchar_t windowsPath[MAX_PATH];
char windowsPathUtf8[MAX_PATH];
UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
// Convert UTF-16 to a UTF-8 string.
UErrorCode status = U_ZERO_ERROR;
int32_t windowsPathUtf8Len = 0;
u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
&windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
(windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
// Ensure it always has a separator, so we can append the ICU data path.
if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
windowsPathUtf8[windowsPathUtf8Len] = '\0';
}
// Check if the concatenated string will fit.
if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
uprv_strcpy(directoryBuffer, windowsPathUtf8);
uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
return TRUE;
}
}
}
#endif
return FALSE;
}
#endif
static void U_CALLCONV dataDirectoryInitFn() {
/* If we already have the directory, then return immediately. Will happen if user called
* u_setDataDirectory().
@ -1425,24 +1421,10 @@ static void U_CALLCONV dataDirectoryInitFn() {
}
#endif
#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
// Use data from the %windir%\globalization\icu directory
// This is only available if ICU is built as a system component
#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS)
char datadir_path_buffer[MAX_PATH];
UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer));
if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1))
{
if (datadir_path_buffer[length - 1] != '\\')
{
datadir_path_buffer[length++] = '\\';
datadir_path_buffer[length] = '\0';
}
if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer))
{
uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS);
path = datadir_path_buffer;
}
if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
path = datadir_path_buffer;
}
#endif
@ -1491,20 +1473,30 @@ static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
#if U_PLATFORM_HAS_WINUWP_API == 0
const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
#else
// TODO: UWP does not support alternate timezone data directories at this time
const char *dir = "";
#if U_PLATFORM_HAS_WINUWP_API != 0
// The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory.
char datadir_path_buffer[MAX_PATH];
if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
dir = datadir_path_buffer;
}
#else
dir = getenv("ICU_TIMEZONE_FILES_DIR");
#endif // U_PLATFORM_HAS_WINUWP_API
#if defined(U_TIMEZONE_FILES_DIR)
if (dir == NULL) {
// Build time configuration setting.
dir = TO_STRING(U_TIMEZONE_FILES_DIR);
}
#endif
if (dir == NULL) {
dir = "";
}
setTimeZoneFilesDir(dir, status);
}
@ -1676,7 +1668,8 @@ The leftmost codepage (.xxx) wins.
/* Note that we scan the *uncorrected* ID. */
if ((p = uprv_strrchr(posixID, '@')) != NULL) {
if (correctedPOSIXLocale == NULL) {
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
/* new locale can be 1 char longer than old one if @ -> __ */
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+2));
/* Exit on memory allocation error. */
if (correctedPOSIXLocale == NULL) {
return NULL;
@ -1693,7 +1686,7 @@ The leftmost codepage (.xxx) wins.
}
if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
}
else {
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
@ -1747,70 +1740,22 @@ The leftmost codepage (.xxx) wins.
#elif U_PLATFORM_USES_ONLY_WIN32_API
#define POSIX_LOCALE_CAPACITY 64
UErrorCode status = U_ZERO_ERROR;
char *correctedPOSIXLocale = 0;
char *correctedPOSIXLocale = nullptr;
// If we have already figured this out just use the cached value
if (gCorrectedPOSIXLocale != NULL) {
if (gCorrectedPOSIXLocale != nullptr) {
return gCorrectedPOSIXLocale;
}
// No cached value, need to determine the current value
static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH];
#if U_PLATFORM_HAS_WINUWP_API == 0
// If not a Universal Windows App, we'll need user default language.
// Vista and above should use Locale Names instead of LCIDs
int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale));
#else
// In a UWP app, we want the top language that the application and user agreed upon
ComPtr<ABI::Windows::Foundation::Collections::IVectorView<HSTRING>> languageList;
static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
ComPtr<ABI::Windows::Globalization::IApplicationLanguagesStatics> applicationLanguagesStatics;
HRESULT hr = GetActivationFactory(
HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(),
&applicationLanguagesStatics);
if (SUCCEEDED(hr))
{
hr = applicationLanguagesStatics->get_Languages(&languageList);
}
if (FAILED(hr))
{
// If there is no application context, then use the top language from the user language profile
ComPtr<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics> globalizationPreferencesStatics;
hr = GetActivationFactory(
HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(),
&globalizationPreferencesStatics);
if (SUCCEEDED(hr))
{
hr = globalizationPreferencesStatics->get_Languages(&languageList);
}
}
// We have a list of languages, ICU knows one, so use the top one for our locale
HString topLanguage;
if (SUCCEEDED(hr))
{
hr = languageList->GetAt(0, topLanguage.GetAddressOf());
}
if (FAILED(hr))
{
// Unexpected, use en-US by default
if (gCorrectedPOSIXLocale == NULL) {
gCorrectedPOSIXLocale = "en_US";
}
return gCorrectedPOSIXLocale;
}
// ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale));
#endif
// Now we should have a Windows locale name that needs converted to the POSIX style,
if (length > 0)
// Now we should have a Windows locale name that needs converted to the POSIX style.
if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
{
// First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH];
char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
int32_t i;
for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
@ -1858,7 +1803,7 @@ The leftmost codepage (.xxx) wins.
}
// If unable to find a locale we can agree upon, use en-US by default
if (gCorrectedPOSIXLocale == NULL) {
if (gCorrectedPOSIXLocale == nullptr) {
gCorrectedPOSIXLocale = "en_US";
}
return gCorrectedPOSIXLocale;

Просмотреть файл

@ -94,7 +94,7 @@ typedef size_t uintptr_t;
# define U_NL_LANGINFO_CODESET CODESET
#endif
#ifdef U_TZSET
#if defined(U_TZSET) || defined(U_HAVE_TZSET)
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
// UWP doesn't support tzset or environment variables for tz
@ -132,7 +132,7 @@ typedef size_t uintptr_t;
# define U_TIMEZONE timezone
#endif
#ifdef U_TZNAME
#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
/* not usable on all windows platforms */
@ -204,30 +204,18 @@ typedef size_t uintptr_t;
/**
* \def U_HAVE_STD_ATOMICS
* Defines whether the standard C++11 <atomic> is available.
* ICU will use this when available,
* otherwise will fall back to compiler or platform specific alternatives.
* Defines whether to use the standard C++11 <atomic> functions
* If false, ICU will fall back to compiler or platform specific alternatives.
* Note: support for these fall back options for atomics will be removed in a future version
* of ICU, and the use of C++ 11 atomics will be required.
* @internal
*/
#ifdef U_HAVE_STD_ATOMICS
/* Use the predefined value. */
#elif U_CPLUSPLUS_VERSION < 11
/* Not C++11, disable use of atomics */
# define U_HAVE_STD_ATOMICS 0
#elif __clang__ && __clang_major__==3 && __clang_minor__<=1
/* Clang 3.1, has atomic variable initializer bug. */
# define U_HAVE_STD_ATOMICS 0
#else
/* U_HAVE_ATOMIC is typically set by an autoconf test of #include <atomic> */
/* Can be set manually, or left undefined, on platforms without autoconf. */
# if defined(U_HAVE_ATOMIC) && U_HAVE_ATOMIC
# define U_HAVE_STD_ATOMICS 1
# else
# define U_HAVE_STD_ATOMICS 0
# endif
#else
# define U_HAVE_STD_ATOMICS 1
#endif
/**
* \def U_HAVE_CLANG_ATOMICS
* Defines whether Clang c11 style built-in atomics are available.
@ -586,6 +574,49 @@ U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
# endif
#endif
#ifdef __cplusplus
/**
* Pin a buffer capacity such that doing pointer arithmetic
* on the destination pointer and capacity cannot overflow.
*
* The pinned capacity must fulfill the following conditions (for positive capacities):
* - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
* - (dest + capacity) >= dest
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
*
* @param dest the destination buffer pointer.
* @param capacity the requested buffer capacity, in units of type T.
* @return the pinned capacity.
* @internal
*/
template <typename T>
inline int32_t pinCapacity(T *dest, int32_t capacity) {
if (capacity <= 0) { return capacity; }
uintptr_t destInt = (uintptr_t)dest;
uintptr_t maxInt;
# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
// We have 31-bit pointers.
maxInt = 0x7fffffff;
# elif U_PLATFORM == U_PF_OS400
maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
# else
maxInt = destInt + 0x7fffffffu;
if (maxInt < destInt) {
// Less than 2GB to the end of the address space.
// Pin to that to prevent address overflow.
maxInt = (uintptr_t)-1;
}
# endif
uintptr_t maxBytes = maxInt - destInt; // max. 2GB
int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
return capacity <= maxCapacity ? capacity : maxCapacity;
}
#endif // __cplusplus
/* Dynamic Library Functions */
typedef void (UVoidFunction)(void);

Просмотреть файл

@ -18,6 +18,8 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include <cinttypes>
#include "unicode/rbbi.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
@ -628,7 +630,7 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
// or on a trail byte if the input is UTF-8.
utext_setNativeIndex(&fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(&fText);
int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
UErrorCode status = U_ZERO_ERROR;
fBreakCache->preceding(adjustedOffset, status);
@ -655,7 +657,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
// But we still need the side effect of leaving iteration at the following boundary.
utext_setNativeIndex(&fText, offset);
int32_t adjustedOffset = utext_getNativeIndex(&fText);
int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
bool result = false;
UErrorCode status = U_ZERO_ERROR;
@ -848,7 +850,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
#ifdef RBBI_DEBUG
if (gTrace) {
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText));
if (0x20<=c && c<0x7f) {
RBBIDebugPrintf("\"%c\" ", c);
} else {

Просмотреть файл

@ -603,7 +603,7 @@ void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
fEndBufIdx = nextIdx;
if (update == UpdateCachePosition) {
// Set current position to the newly added boundary.
@ -631,7 +631,7 @@ bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
fStartBufIdx = nextIdx;
if (update == UpdateCachePosition) {
fBufIdx = nextIdx;

Просмотреть файл

@ -303,17 +303,24 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
}
void RBBIRuleBuilder::optimizeTables() {
bool didSomething;
do {
didSomething = false;
// Begin looking for duplicates with char class 3.
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
// and should not have other categories merged into them.
IntPair duplPair = {3, 0};
// Begin looking for duplicates with char class 3.
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
// and should not have other categories merged into them.
IntPair duplPair = {3, 0};
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
fSetBuilder->mergeCategories(duplPair);
fForwardTable->removeColumn(duplPair.second);
didSomething = true;
}
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
fSetBuilder->mergeCategories(duplPair);
fForwardTable->removeColumn(duplPair.second);
}
fForwardTable->removeDuplicateStates();
while (fForwardTable->removeDuplicateStates() > 0) {
didSomething = true;
}
} while (didSomething);
}
U_NAMESPACE_END

Просмотреть файл

@ -380,7 +380,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
// with the current rule expression (on the Node Stack)
// with the resulting OR expression going to *destRules
//
RBBINode *thisRule = fNodeStack[fNodeStackPtr];
thisRule = fNodeStack[fNodeStackPtr];
RBBINode *prevRules = *destRules;
RBBINode *orNode = pushNewNode(RBBINode::opOr);
if (U_FAILURE(*fRB->fStatus)) {

Просмотреть файл

@ -428,8 +428,8 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) {
addRuleRootNodes(&ruleRootNodes, tree);
UVector matchStartNodes(*fStatus);
for (int i=0; i<ruleRootNodes.size(); ++i) {
RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(i));
for (int j=0; j<ruleRootNodes.size(); ++j) {
RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
if (node->fChainIn) {
setAdd(&matchStartNodes, node->fFirstPosSet);
}
@ -1082,21 +1082,22 @@ bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; categories->first < numCols-1; categories->first++) {
for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
if (table_base != table_dupl) {
break;
}
}
if (table_base == table_dupl) {
return true;
}
// Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
uint16_t table_base = 0;
uint16_t table_dupl = 1;
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
if (table_base != table_dupl) {
break;
}
}
if (table_base == table_dupl) {
return true;
}
}
}
return false;
@ -1236,7 +1237,7 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) {
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->setCharAt(col, newVal);
sd->setCharAt(col, static_cast<char16_t>(newVal));
}
}
}
@ -1245,12 +1246,16 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) {
/*
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t RBBITableBuilder::removeDuplicateStates() {
IntPair dupls = {3, 0};
int32_t numStatesRemoved = 0;
while (findDuplicateState(&dupls)) {
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls);
++numStatesRemoved;
}
return numStatesRemoved;
}
@ -1411,7 +1416,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
// Note: +2 for the start & stop state.
startState.setCharAt(charClass, charClass+2);
startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
}
// Initially make every other state table row look like the start state row,

Просмотреть файл

@ -15,6 +15,9 @@
#define RBBITBLB_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "rbbirb.h"
@ -66,8 +69,11 @@ public:
*/
void removeColumn(int32_t column);
/** Check for, and remove dupicate states (table rows). */
void removeDuplicateStates();
/**
* Check for, and remove dupicate states (table rows).
* @return the number of states removed.
*/
int32_t removeDuplicateStates();
/** Build the safe reverse table from the already-constructed forward table. */
void buildSafeReverseTable(UErrorCode &status);
@ -204,4 +210,7 @@ private:
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif

Просмотреть файл

@ -702,9 +702,9 @@ ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const
}
// fallback
UErrorCode status = U_ZERO_ERROR;
status = U_ZERO_ERROR;
ICUServiceKey* fallbackKey = createKey(&id, status);
while (fallbackKey->fallback()) {
while (fallbackKey != NULL && fallbackKey->fallback()) {
UnicodeString us;
fallbackKey->currentID(us);
f = (ICUServiceFactory*)map->get(us);

Просмотреть файл

@ -104,7 +104,7 @@ public:
/**
* Deletes this object if it has no references.
* Available for non-cached SharedObjects only. Ownership of cached objects
* is with the UnifiedCache, which is soley responsible for eviction and deletion.
* is with the UnifiedCache, which is solely responsible for eviction and deletion.
*/
void deleteIfZeroRefCount() const;

Просмотреть файл

@ -27,6 +27,7 @@ UnicodeSet* gUnicodeSets[COUNT] = {};
// Save the empty instance in static memory to have well-defined behavior if a
// regular UnicodeSet cannot be allocated.
alignas(UnicodeSet)
char gEmptyUnicodeSet[sizeof(UnicodeSet)];
// Whether the gEmptyUnicodeSet is initialized and ready to use.

Просмотреть файл

@ -373,7 +373,7 @@ StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
return newNode;
}
UBool
int32_t
StringTrieBuilder::hashNode(const void *node) {
return ((const Node *)node)->hashCode();
}

Просмотреть файл

@ -624,7 +624,7 @@ getDirProps(UBiDi *pBiDi) {
pBiDi->paras[pBiDi->paraCount-1].level=1;
}
if(isDefaultLevel) {
pBiDi->paraLevel=pBiDi->paras[0].level;
pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
}
/* The following is needed to resolve the text direction for default level
paragraphs containing no strong character */
@ -825,28 +825,28 @@ bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
N0c1. */
if((direction==0 && pOpening->flags&FOUND_L) ||
(direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
newProp=direction;
(direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
newProp=static_cast<DirProp>(direction);
}
else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
/* it is stable if there is no containing pair or in
conditions too complicated and not worth checking */
stable=(openIdx==pLastIsoRun->start);
if(direction!=pOpening->contextDir)
newProp=pOpening->contextDir; /* N0c1 */
newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */
else
newProp=direction; /* N0c2 */
newProp= static_cast<DirProp>(direction); /* N0c2 */
} else {
/* forget this and any brackets nested within this pair */
pLastIsoRun->limit=openIdx;
return ON; /* N0d */
pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
return ON; /* N0d */
}
bd->pBiDi->dirProps[pOpening->position]=newProp;
bd->pBiDi->dirProps[position]=newProp;
/* Update nested N0c pairs that may be affected */
fixN0c(bd, openIdx, pOpening->position, newProp);
if(stable) {
pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
/* remove lower located synonyms if any */
while(pLastIsoRun->limit>pLastIsoRun->start &&
bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
@ -918,7 +918,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
bracket or it is a case of N0d */
/* Now see if it is an opening bracket */
if(c)
match=u_getBidiPairedBracket(c); /* get the matching char */
match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */
else
match=0;
if(match!=c && /* has a matching char */
@ -948,7 +948,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
pLastIsoRun->contextPos=position;
}
else if(dirProp<=R || dirProp==AL) {
newProp=DIR_FROM_STRONG(dirProp);
newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
pLastIsoRun->lastBase=dirProp;
pLastIsoRun->lastStrong=dirProp;
pLastIsoRun->contextDir=(UBiDiDirection)newProp;
@ -1101,7 +1101,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
else
start=pBiDi->paras[paraIndex-1].limit;
limit=pBiDi->paras[paraIndex].limit;
level=pBiDi->paras[paraIndex].level;
level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
for(i=start; i<limit; i++)
levels[i]=level;
}
@ -1119,7 +1119,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
else
start=pBiDi->paras[paraIndex-1].limit;
limit=pBiDi->paras[paraIndex].limit;
level=pBiDi->paras[paraIndex].level;
level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
for(i=start; i<limit; i++) {
levels[i]=level;
dirProp=dirProps[i];
@ -2827,7 +2827,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
DirProp dirProp;
for(i=0; i<pBiDi->paraCount; i++) {
last=(pBiDi->paras[i].limit)-1;
level=pBiDi->paras[i].level;
level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
if(level==0)
continue; /* LTR paragraph */
start= i==0 ? 0 : pBiDi->paras[i-1].limit;

Просмотреть файл

@ -146,7 +146,7 @@ static UBool
action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
{
ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
pTransform->reorderingOptions, pErrorCode);
static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode);
*pTransform->pDestLength = pTransform->srcLength;
pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
@ -393,9 +393,9 @@ resolveBaseDirection(const UChar *text, uint32_t length,
switch (*pInLevel) {
case UBIDI_DEFAULT_LTR:
case UBIDI_DEFAULT_RTL: {
UBiDiLevel level = ubidi_getBaseDirection(text, length);
*pInLevel = level != UBIDI_NEUTRAL ? level
: *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR;
UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length));
*pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level
: *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR);
break;
}
default:

Просмотреть файл

@ -45,6 +45,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_CURRENCY,
UCLN_COMMON_LOADED_NORMALIZER2,
UCLN_COMMON_NORMALIZER2,
UCLN_COMMON_CHARACTERPROPERTIES,
UCLN_COMMON_USET,
UCLN_COMMON_UNAMES,
UCLN_COMMON_UPROPS,
@ -52,7 +53,6 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_UCNV_IO,
UCLN_COMMON_UDATA,
UCLN_COMMON_PUTIL,
UCLN_COMMON_LIST_FORMATTER,
UCLN_COMMON_UINIT,
/*

Просмотреть файл

@ -1743,13 +1743,9 @@ ucnv_fromUChars(UConverter *cnv,
}
if(srcLength>0) {
srcLimit=src+srcLength;
destCapacity=pinCapacity(dest, destCapacity);
destLimit=dest+destCapacity;
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
destLimit=(char *)U_MAX_PTR(dest);
}
/* perform the conversion */
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength=(int32_t)(dest-originalDest);
@ -1803,13 +1799,9 @@ ucnv_toUChars(UConverter *cnv,
}
if(srcLength>0) {
srcLimit=src+srcLength;
destCapacity=pinCapacity(dest, destCapacity);
destLimit=dest+destCapacity;
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
destLimit=(UChar *)U_MAX_PTR(dest);
}
/* perform the conversion */
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
destLength=(int32_t)(dest-originalDest);

Просмотреть файл

@ -2772,7 +2772,7 @@ getTrailByte:
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
++mySource;
/* add another bit so that the code below writes 2 bytes in case of error */
mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
mySourceChar = static_cast<UChar>(0x10000 | (mySourceChar << 8) | trailByte);
}
} else {
args->converter->toUBytes[0] = (uint8_t)mySourceChar;
@ -3304,7 +3304,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
args->converter->toUCallbackReason = UCNV_IRREGULAR;
args->converter->toUBytes[0] = mySourceChar;
args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
args->converter->toULength = 1;
args->target = myTarget;
args->source = mySource;

Просмотреть файл

@ -180,7 +180,7 @@ _CompoundTextgetName(const UConverter* cnv);
static int32_t findNextEsc(const char *source, const char *sourceLimit) {
int32_t length = sourceLimit - source;
int32_t length = static_cast<int32_t>(sourceLimit - source);
int32_t i;
for (i = 1; i < length; i++) {
if (*(source + i) == 0x1B) {

Просмотреть файл

@ -71,7 +71,7 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
/* write the BOM if necessary */
if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ (char)0xfe, (char)0xff };
static const char bom[]={ (char)0xfeu, (char)0xffu };
ucnv_fromUWriteBytes(cnv,
bom, 2,
&pArgs->target, pArgs->targetLimit,
@ -672,7 +672,7 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
/* write the BOM if necessary */
if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ (char)0xff, (char)0xfe };
static const char bom[]={ (char)0xffu, (char)0xfeu };
ucnv_fromUWriteBytes(cnv,
bom, 2,
&pArgs->target, pArgs->targetLimit,

Просмотреть файл

@ -228,7 +228,7 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
/* write the BOM if necessary */
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
ucnv_fromUWriteBytes(args->converter,
bom, 4,
&args->target, args->targetLimit,
@ -331,7 +331,7 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
/* write the BOM if necessary */
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
ucnv_fromUWriteBytes(args->converter,
bom, 4,
&args->target, args->targetLimit,
@ -706,7 +706,7 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
/* write the BOM if necessary */
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
ucnv_fromUWriteBytes(args->converter,
bom, 4,
&args->target, args->targetLimit,
@ -817,7 +817,7 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
/* write the BOM if necessary */
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
ucnv_fromUWriteBytes(args->converter,
bom, 4,
&args->target, args->targetLimit,
@ -1043,7 +1043,7 @@ _UTF32Open(UConverter *cnv,
_UTF32Reset(cnv, UCNV_RESET_BOTH);
}
static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 };
static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 };
static void U_CALLCONV
_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
@ -1071,7 +1071,7 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
b=*source;
if(b==0) {
state=1; /* could be 00 00 FE FF */
} else if(b==(char)0xff) {
} else if(b==(char)0xffu) {
state=5; /* could be FF FE 00 00 */
} else {
state=8; /* default to UTF-32BE */

Просмотреть файл

@ -108,7 +108,7 @@ morebytes:
if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource);
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{
break; /* i < inBytes */
@ -225,7 +225,7 @@ morebytes:
if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource);
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{
break; /* i < inBytes */

Просмотреть файл

@ -199,7 +199,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
args->converter->toUCallbackReason = UCNV_IRREGULAR;
args->converter->toUBytes[0] = UCNV_TILDE;
args->converter->toUBytes[1] = mySourceChar;
args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
args->converter->toULength = 2;
args->target = myTarget;
args->source = mySource;
@ -229,7 +229,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
--mySource;
} else {
/* Include the current byte in the illegal sequence. */
args->converter->toUBytes[1] = mySourceChar;
args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
args->converter->toULength = 2;
}
args->target = myTarget;

Просмотреть файл

@ -4164,8 +4164,8 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
nextSourceIndex=0;
/* Get the SI/SO character for the converter */
siLength = getSISOBytes(SI, cnv->options, siBytes);
soLength = getSISOBytes(SO, cnv->options, soBytes);
siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
/* conversion loop */
/*

Просмотреть файл

@ -41,6 +41,7 @@
#include "propsvec.h"
#include "uassert.h"
#include "ucmndata.h"
#include "udataswp.h"
#include "uenumimp.h"
#include "cmemory.h"
#include "cstring.h"
@ -72,7 +73,7 @@ static void generateSelectorData(UConverterSelector* result,
// set errorValue to all-ones
for (int32_t col = 0; col < columns; col++) {
upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
col, ~0, ~0, status);
col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status);
}
for (int32_t i = 0; i < result->encodingsCount; ++i) {
@ -109,7 +110,7 @@ static void generateSelectorData(UConverterSelector* result,
// this will be reached for the converters that fill the set with
// strings. Those should be ignored by our system
} else {
upvec_setValue(upvec, start_char, end_char, column, ~0, mask,
upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask,
status);
}
}
@ -130,7 +131,7 @@ static void generateSelectorData(UConverterSelector* result,
uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
status);
for (int32_t col = 0; col < columns; col++) {
upvec_setValue(upvec, start_char, end_char, col, ~0, ~0,
upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
status);
}
}
@ -684,7 +685,7 @@ static int16_t countOnes(uint32_t* mask, int32_t len) {
ent &= ent - 1; // clear the least significant bit set
}
}
return totalOnes;
return static_cast<int16_t>(totalOnes);
}

Просмотреть файл

@ -28,81 +28,6 @@
/* swapping ----------------------------------------------------------------- */
/*
* This performs data swapping for a folded trie (see utrie.c for details).
*/
U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode) {
const UTrieHeader *inTrie;
UTrieHeader trie;
int32_t size;
UBool dataIs32;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* setup and swapping */
if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
inTrie=(const UTrieHeader *)inData;
trie.signature=ds->readUInt32(inTrie->signature);
trie.options=ds->readUInt32(inTrie->options);
trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
if( trie.signature!=0x54726965 ||
(trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
(trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
(trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
) {
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
return 0;
}
dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
if(length>=0) {
UTrieHeader *outTrie;
if(length<size) {
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
outTrie=(UTrieHeader *)outData;
/* swap the header */
ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
/* swap the index and the data */
if(dataIs32) {
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
} else {
ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
}
}
return size;
}
#if !UCONFIG_NO_COLLATION
U_CAPI UBool U_EXPORT2

Просмотреть файл

@ -0,0 +1,590 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucptrie.cpp (modified from utrie2.cpp)
// created: 2017dec29 Markus W. Scherer
// #define UCPTRIE_DEBUG
#ifdef UCPTRIE_DEBUG
# include <stdio.h>
#endif
#include "unicode/utypes.h"
#include "unicode/ucptrie.h"
#include "unicode/utf.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "uassert.h"
#include "ucptrie_impl.h"
U_CAPI UCPTrie * U_EXPORT2
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
const void *data, int32_t length, int32_t *pActualLength,
UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) {
return nullptr;
}
if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
// Enough data for a trie header?
if (length < (int32_t)sizeof(UCPTrieHeader)) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return nullptr;
}
// Check the signature.
const UCPTrieHeader *header = (const UCPTrieHeader *)data;
if (header->signature != UCPTRIE_SIG) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return nullptr;
}
int32_t options = header->options;
int32_t typeInt = (options >> 6) & 3;
int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
(options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return nullptr;
}
UCPTrieType actualType = (UCPTrieType)typeInt;
UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
if (type < 0) {
type = actualType;
}
if (valueWidth < 0) {
valueWidth = actualValueWidth;
}
if (type != actualType || valueWidth != actualValueWidth) {
*pErrorCode = U_INVALID_FORMAT_ERROR;
return nullptr;
}
// Get the length values and offsets.
UCPTrie tempTrie;
uprv_memset(&tempTrie, 0, sizeof(tempTrie));
tempTrie.indexLength = header->indexLength;
tempTrie.dataLength =
((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
tempTrie.index3NullOffset = header->index3NullOffset;
tempTrie.dataNullOffset =
((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
tempTrie.type = type;
tempTrie.valueWidth = valueWidth;
// Calculate the actual length.
int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
if (valueWidth == UCPTRIE_VALUE_BITS_16) {
actualLength += tempTrie.dataLength * 2;
} else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
actualLength += tempTrie.dataLength * 4;
} else {
actualLength += tempTrie.dataLength;
}
if (length < actualLength) {
*pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
return nullptr;
}
// Allocate the trie.
UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
if (trie == nullptr) {
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
#ifdef UCPTRIE_DEBUG
trie->name = "fromSerialized";
#endif
// Set the pointers to its index and data arrays.
const uint16_t *p16 = (const uint16_t *)(header + 1);
trie->index = p16;
p16 += trie->indexLength;
// Get the data.
int32_t nullValueOffset = trie->dataNullOffset;
if (nullValueOffset >= trie->dataLength) {
nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
}
switch (valueWidth) {
case UCPTRIE_VALUE_BITS_16:
trie->data.ptr16 = p16;
trie->nullValue = trie->data.ptr16[nullValueOffset];
break;
case UCPTRIE_VALUE_BITS_32:
trie->data.ptr32 = (const uint32_t *)p16;
trie->nullValue = trie->data.ptr32[nullValueOffset];
break;
case UCPTRIE_VALUE_BITS_8:
trie->data.ptr8 = (const uint8_t *)p16;
trie->nullValue = trie->data.ptr8[nullValueOffset];
break;
default:
// Unreachable because valueWidth was checked above.
*pErrorCode = U_INVALID_FORMAT_ERROR;
return nullptr;
}
if (pActualLength != nullptr) {
*pActualLength = actualLength;
}
return trie;
}
U_CAPI void U_EXPORT2
ucptrie_close(UCPTrie *trie) {
uprv_free(trie);
}
U_CAPI UCPTrieType U_EXPORT2
ucptrie_getType(const UCPTrie *trie) {
return (UCPTrieType)trie->type;
}
U_CAPI UCPTrieValueWidth U_EXPORT2
ucptrie_getValueWidth(const UCPTrie *trie) {
return (UCPTrieValueWidth)trie->valueWidth;
}
U_CAPI int32_t U_EXPORT2
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
int32_t i1 = c >> UCPTRIE_SHIFT_1;
if (trie->type == UCPTRIE_TYPE_FAST) {
U_ASSERT(0xffff < c && c < trie->highStart);
i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
} else {
U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
i1 += UCPTRIE_SMALL_INDEX_LENGTH;
}
int32_t i3Block = trie->index[
(int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
int32_t dataBlock;
if ((i3Block & 0x8000) == 0) {
// 16-bit indexes
dataBlock = trie->index[i3Block + i3];
} else {
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
i3 &= 7;
dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
dataBlock |= trie->index[i3Block + i3];
}
return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
}
U_CAPI int32_t U_EXPORT2
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
if (c >= trie->highStart) {
// Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
}
return ucptrie_internalSmallIndex(trie, c);
}
U_CAPI int32_t U_EXPORT2
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
const uint8_t *start, const uint8_t *src) {
int32_t i, length;
// Support 64-bit pointers by avoiding cast of arbitrary difference.
if ((src - start) <= 7) {
i = length = (int32_t)(src - start);
} else {
i = length = 7;
start = src - 7;
}
c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
i = length - i; // Number of bytes read backward from src.
int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
return (idx << 3) | i;
}
namespace {
inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
switch (valueWidth) {
case UCPTRIE_VALUE_BITS_16:
return data.ptr16[dataIndex];
case UCPTRIE_VALUE_BITS_32:
return data.ptr32[dataIndex];
case UCPTRIE_VALUE_BITS_8:
return data.ptr8[dataIndex];
default:
// Unreachable if the trie is properly initialized.
return 0xffffffff;
}
}
} // namespace
U_CAPI uint32_t U_EXPORT2
ucptrie_get(const UCPTrie *trie, UChar32 c) {
int32_t dataIndex;
if ((uint32_t)c <= 0x7f) {
// linear ASCII
dataIndex = c;
} else {
UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
}
return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
}
namespace {
constexpr int32_t MAX_UNICODE = 0x10ffff;
inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
UCPMapValueFilter *filter, const void *context) {
if (value == trieNullValue) {
value = nullValue;
} else if (filter != nullptr) {
value = filter(context, value);
}
return value;
}
UChar32 getRange(const void *t, UChar32 start,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
if ((uint32_t)start > MAX_UNICODE) {
return U_SENTINEL;
}
const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
if (start >= trie->highStart) {
if (pValue != nullptr) {
int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
uint32_t value = getValue(trie->data, valueWidth, di);
if (filter != nullptr) { value = filter(context, value); }
*pValue = value;
}
return MAX_UNICODE;
}
uint32_t nullValue = trie->nullValue;
if (filter != nullptr) { nullValue = filter(context, nullValue); }
const uint16_t *index = trie->index;
int32_t prevI3Block = -1;
int32_t prevBlock = -1;
UChar32 c = start;
uint32_t value;
bool haveValue = false;
do {
int32_t i3Block;
int32_t i3;
int32_t i3BlockLength;
int32_t dataBlockLength;
if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
i3Block = 0;
i3 = c >> UCPTRIE_FAST_SHIFT;
i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
} else {
// Use the multi-stage index.
int32_t i1 = c >> UCPTRIE_SHIFT_1;
if (trie->type == UCPTRIE_TYPE_FAST) {
U_ASSERT(0xffff < c && c < trie->highStart);
i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
} else {
U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
i1 += UCPTRIE_SMALL_INDEX_LENGTH;
}
i3Block = trie->index[
(int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
// The index-3 block is the same as the previous one, and filled with value.
U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
continue;
}
prevI3Block = i3Block;
if (i3Block == trie->index3NullOffset) {
// This is the index-3 null block.
if (haveValue) {
if (nullValue != value) {
return c - 1;
}
} else {
value = nullValue;
if (pValue != nullptr) { *pValue = nullValue; }
haveValue = true;
}
prevBlock = trie->dataNullOffset;
c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
continue;
}
i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
}
// Enumerate data blocks for one index-3 block.
do {
int32_t block;
if ((i3Block & 0x8000) == 0) {
block = index[i3Block + i3];
} else {
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
int32_t gi = i3 & 7;
block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000;
block |= index[group + gi];
}
if (block == prevBlock && (c - start) >= dataBlockLength) {
// The block is the same as the previous one, and filled with value.
U_ASSERT((c & (dataBlockLength - 1)) == 0);
c += dataBlockLength;
} else {
int32_t dataMask = dataBlockLength - 1;
prevBlock = block;
if (block == trie->dataNullOffset) {
// This is the data null block.
if (haveValue) {
if (nullValue != value) {
return c - 1;
}
} else {
value = nullValue;
if (pValue != nullptr) { *pValue = nullValue; }
haveValue = true;
}
c = (c + dataBlockLength) & ~dataMask;
} else {
int32_t di = block + (c & dataMask);
uint32_t value2 = getValue(trie->data, valueWidth, di);
value2 = maybeFilterValue(value2, trie->nullValue, nullValue,
filter, context);
if (haveValue) {
if (value2 != value) {
return c - 1;
}
} else {
value = value2;
if (pValue != nullptr) { *pValue = value; }
haveValue = true;
}
while ((++c & dataMask) != 0) {
if (maybeFilterValue(getValue(trie->data, valueWidth, ++di),
trie->nullValue, nullValue,
filter, context) != value) {
return c - 1;
}
}
}
}
} while (++i3 < i3BlockLength);
} while (c < trie->highStart);
U_ASSERT(haveValue);
int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
uint32_t highValue = getValue(trie->data, valueWidth, di);
if (maybeFilterValue(highValue, trie->nullValue, nullValue,
filter, context) != value) {
return c - 1;
} else {
return MAX_UNICODE;
}
}
} // namespace
U_CFUNC UChar32
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
const void *trie, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
if (option == UCPMAP_RANGE_NORMAL) {
return getRange(trie, start, filter, context, pValue);
}
uint32_t value;
if (pValue == nullptr) {
// We need to examine the range value even if the caller does not want it.
pValue = &value;
}
UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
UChar32 end = getRange(trie, start, filter, context, pValue);
if (end < 0xd7ff || start > surrEnd) {
return end;
}
// The range overlaps with surrogates, or ends just before the first one.
if (*pValue == surrogateValue) {
if (end >= surrEnd) {
// Surrogates followed by a non-surrogateValue range,
// or surrogates are part of a larger surrogateValue range.
return end;
}
} else {
if (start <= 0xd7ff) {
return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
}
// Start is a surrogate with a non-surrogateValue code *unit* value.
// Return a surrogateValue code *point* range.
*pValue = surrogateValue;
if (end > surrEnd) {
return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
}
}
// See if the surrogateValue surrogate range can be merged with
// an immediately following range.
uint32_t value2;
UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
if (value2 == surrogateValue) {
return end2;
}
return surrEnd;
}
U_CAPI UChar32 U_EXPORT2
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
return ucptrie_internalGetRange(getRange, trie, start,
option, surrogateValue,
filter, context, pValue);
}
U_CAPI int32_t U_EXPORT2
ucptrie_toBinary(const UCPTrie *trie,
void *data, int32_t capacity,
UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) {
return 0;
}
UCPTrieType type = (UCPTrieType)trie->type;
UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
capacity < 0 ||
(capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
switch (valueWidth) {
case UCPTRIE_VALUE_BITS_16:
length += trie->dataLength * 2;
break;
case UCPTRIE_VALUE_BITS_32:
length += trie->dataLength * 4;
break;
case UCPTRIE_VALUE_BITS_8:
length += trie->dataLength;
break;
default:
// unreachable
break;
}
if (capacity < length) {
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
return length;
}
char *bytes = (char *)data;
UCPTrieHeader *header = (UCPTrieHeader *)bytes;
header->signature = UCPTRIE_SIG; // "Tri3"
header->options = (uint16_t)(
((trie->dataLength & 0xf0000) >> 4) |
((trie->dataNullOffset & 0xf0000) >> 8) |
(trie->type << 6) |
valueWidth);
header->indexLength = (uint16_t)trie->indexLength;
header->dataLength = (uint16_t)trie->dataLength;
header->index3NullOffset = trie->index3NullOffset;
header->dataNullOffset = (uint16_t)trie->dataNullOffset;
header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
bytes += sizeof(UCPTrieHeader);
uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
bytes += trie->indexLength * 2;
switch (valueWidth) {
case UCPTRIE_VALUE_BITS_16:
uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
break;
case UCPTRIE_VALUE_BITS_32:
uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
break;
case UCPTRIE_VALUE_BITS_8:
uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
break;
default:
// unreachable
break;
}
return length;
}
namespace {
#ifdef UCPTRIE_DEBUG
long countNull(const UCPTrie *trie) {
uint32_t nullValue=trie->nullValue;
int32_t length=trie->dataLength;
long count=0;
switch (trie->valueWidth) {
case UCPTRIE_VALUE_BITS_16:
for(int32_t i=0; i<length; ++i) {
if(trie->data.ptr16[i]==nullValue) { ++count; }
}
break;
case UCPTRIE_VALUE_BITS_32:
for(int32_t i=0; i<length; ++i) {
if(trie->data.ptr32[i]==nullValue) { ++count; }
}
break;
case UCPTRIE_VALUE_BITS_8:
for(int32_t i=0; i<length; ++i) {
if(trie->data.ptr8[i]==nullValue) { ++count; }
}
break;
default:
// unreachable
break;
}
return count;
}
U_CFUNC void
ucptrie_printLengths(const UCPTrie *trie, const char *which) {
long indexLength=trie->indexLength;
long dataLength=(long)trie->dataLength;
long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
}
#endif
} // namespace
// UCPMap ----
// Initially, this is the same as UCPTrie. This may well change.
U_CAPI uint32_t U_EXPORT2
ucpmap_get(const UCPMap *map, UChar32 c) {
return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
}
U_CAPI UChar32 U_EXPORT2
ucpmap_getRange(const UCPMap *map, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
option, surrogateValue,
filter, context, pValue);
}

Просмотреть файл

@ -0,0 +1,289 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucptrie_impl.h (modified from utrie2_impl.h)
// created: 2017dec29 Markus W. Scherer
#ifndef __UCPTRIE_IMPL_H__
#define __UCPTRIE_IMPL_H__
#include "unicode/ucptrie.h"
#ifdef UCPTRIE_DEBUG
#include "unicode/umutablecptrie.h"
#endif
// UCPTrie signature values, in platform endianness and opposite endianness.
// The UCPTrie signature ASCII byte values spell "Tri3".
#define UCPTRIE_SIG 0x54726933
#define UCPTRIE_OE_SIG 0x33697254
/**
* Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
* @internal
*/
struct UCPTrieHeader {
/** "Tri3" in big-endian US-ASCII (0x54726933) */
uint32_t signature;
/**
* Options bit field:
* Bits 15..12: Data length bits 19..16.
* Bits 11..8: Data null block offset bits 19..16.
* Bits 7..6: UCPTrieType
* Bits 5..3: Reserved (0).
* Bits 2..0: UCPTrieValueWidth
*/
uint16_t options;
/** Total length of the index tables. */
uint16_t indexLength;
/** Data length bits 15..0. */
uint16_t dataLength;
/** Index-3 null block offset, 0x7fff or 0xffff if none. */
uint16_t index3NullOffset;
/** Data null block offset bits 15..0, 0xfffff if none. */
uint16_t dataNullOffset;
/**
* First code point of the single-value range ending with U+10ffff,
* rounded up and then shifted right by UCPTRIE_SHIFT_2.
*/
uint16_t shiftedHighStart;
};
/**
* Constants for use with UCPTrieHeader.options.
* @internal
*/
enum {
UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
/**
* Value for index3NullOffset which indicates that there is no index-3 null block.
* Bit 15 is unused for this value because this bit is used if the index-3 contains
* 18-bit indexes.
*/
UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
};
// Internal constants.
enum {
/** The length of the BMP index table. 1024=0x400 */
UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
UCPTRIE_SMALL_LIMIT = 0x1000,
UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
/** Shift size for getting the index-3 table offset. */
UCPTRIE_SHIFT_3 = 4,
/** Shift size for getting the index-2 table offset. */
UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
/** Shift size for getting the index-1 table offset. */
UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
/**
* Difference between two shift sizes,
* for getting an index-2 offset from an index-3 offset. 5=9-4
*/
UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
/**
* Difference between two shift sizes,
* for getting an index-1 offset from an index-2 offset. 5=14-9
*/
UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
/**
* Number of index-1 entries for the BMP. (4)
* This part of the index-1 table is omitted from the serialized form.
*/
UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
/** Number of entries in an index-2 block. 32=0x20 */
UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
/** Mask for getting the lower bits for the in-index-2-block offset. */
UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
/** Number of code points per index-2 table entry. 512=0x200 */
UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
/** Number of entries in an index-3 block. 32=0x20 */
UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
/** Mask for getting the lower bits for the in-index-3-block offset. */
UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
/** Number of entries in a small data block. 16=0x10 */
UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
/** Mask for getting the lower bits for the in-small-data-block offset. */
UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
};
typedef UChar32
UCPTrieGetRange(const void *trie, UChar32 start,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
U_CFUNC UChar32
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
const void *trie, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
#ifdef UCPTRIE_DEBUG
U_CFUNC void
ucptrie_printLengths(const UCPTrie *trie, const char *which);
U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
#endif
/*
* Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
* For overview information see http://site.icu-project.org/design/struct/utrie
*
* The binary trie data should be 32-bit-aligned.
* The overall layout is:
*
* UCPTrieHeader header; -- 16 bytes, see struct definition above
* uint16_t index[header.indexLength];
* uintXY_t data[header.dataLength];
*
* The trie data array is an array of uint16_t, uint32_t, or uint8_t,
* specified via the UCPTrieValueWidth when building the trie.
* The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
* The overall length of the trie data is a multiple of 4 bytes.
* (Padding is added at the end of the index array and/or near the end of the data array as needed.)
*
* The length of the data array (dataLength) is stored as an integer split across two fields
* of the header struct (high bits in header.options).
*
* The trie type can be "fast" or "small" which determines the index structure,
* specified via the UCPTrieType when building the trie.
*
* The type and valueWidth are stored in the header.options.
* There are reserved type and valueWidth values, and reserved header.options bits.
* They could be used in future format extensions.
* Code reading the trie structure must fail with an error when unknown values or options are set.
*
* Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
*
* Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
* For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
* For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
*
* All code points in the range highStart..U+10FFFF map to a single highValue
* which is stored at the second-to-last position of the data array.
* (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
* The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
* (UCPTRIE_SHIFT_2=9)
*
* Values for code points fast_limit..highStart-1 are found via four-stage lookup.
* The data block size is smaller for this range than for the fast range.
* This together with more index stages with small blocks makes this range
* more easily compactable.
*
* There is also a trie error value stored at the last position of the data array.
* (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
* It is intended to be returned for inputs that are not Unicode code points
* (outside U+0000..U+10FFFF), or in string processing for ill-formed input
* (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
*
* For a "fast" trie:
*
* The index array starts with the BMP index table for BMP code point lookup.
* Its length is 1024=0x400.
*
* The supplementary index-1 table follows the BMP index table.
* Variable length, for code points up to highStart-1.
* Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
* (For 0x100000 supplementary code points U+10000..U+10ffff.)
*
* After this index-1 table follow the variable-length index-3 and index-2 tables.
*
* The supplementary index tables are omitted completely
* if there is only BMP data (highStart<=U+10000).
*
* For a "small" trie:
*
* The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
*
* The "supplementary" index tables are always stored.
* The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
*
* For both trie types:
*
* The last index-2 block may be a partial block, storing indexes only for code points
* below highStart.
*
* Lookup for ASCII code point c:
*
* Linear access from the start of the data array.
*
* value = data[c];
*
* Lookup for fast-range code point c:
*
* Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
* fetch the index array value at that offset,
* add the lower code point bits, index into the data array.
*
* value = data[index[c>>6] + (c&0x3f)];
*
* (This works for ASCII as well.)
*
* Lookup for small-range code point c below highStart:
*
* Split the code point into four bit fields using several sets of shifts & masks
* to read consecutive values from the index-1, index-2, index-3 and data tables.
*
* If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
* then the data block offsets are stored directly as uint16_t.
*
* Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
* has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
* an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
* in the additional word.
*
* See ucptrie_internalSmallIndex() for details.
*
* (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
*
* Compaction:
*
* Multiple code point ranges ("blocks") that are aligned on certain boundaries
* (determined by the shifting/bit fields of code points) and
* map to the same data values normally share a single subsequence of the data array.
* Data blocks can also overlap partially.
* (Depending on the builder code finding duplicate and overlapping blocks.)
*
* Iteration over same-value ranges:
*
* Range iteration (ucptrie_getRange()) walks the structure from a start code point
* until some code point is found that maps to a different value;
* the end of the returned range is just before that.
*
* The header.dataNullOffset (split across two header fields, high bits in header.options)
* is the offset of a widely shared data block filled with one single value.
* It helps quickly skip over large ranges of data with that value.
* The builder must ensure that if the start of any data block (fast or small)
* matches the dataNullOffset, then the whole block must be filled with the null value.
* Special care must be taken if there is no fast null data block
* but a small one, which is shorter, and it matches the *start* of some fast data block.
*
* Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
* where all index entries point to the dataNullOffset.
* If there is no such data or index-3 block, then these offsets are set to
* values that cannot be reached (data offset out of range/reserved index offset),
* normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
*/
#endif

Просмотреть файл

@ -1077,11 +1077,11 @@ collectCurrencyNames(const char* locale,
}
// currency plurals
UErrorCode ec3 = U_ZERO_ERROR;
UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3);
UErrorCode ec5 = U_ZERO_ERROR;
UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5);
n = ures_getSize(curr_p);
for (int32_t i=0; i<n; ++i) {
UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec3);
UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec5);
iso = (char*)ures_getKey(names);
// Using hash to remove duplicated ISO codes in fallback chain.
if (localeLevel == 0) {
@ -1099,7 +1099,7 @@ collectCurrencyNames(const char* locale,
for (int32_t j = 0; j < num; ++j) {
// TODO: remove duplicates between singular name and
// currency long name?
s = ures_getStringByIndex(names, j, &len, &ec3);
s = ures_getStringByIndex(names, j, &len, &ec5);
(*currencyNames)[*total_currency_name_count].IsoCode = iso;
UChar* upperName = toUpperCase(s, len, locale);
(*currencyNames)[*total_currency_name_count].currencyName = upperName;
@ -1449,7 +1449,7 @@ getCacheEntry(const char* locale, UErrorCode& ec) {
umtx_lock(&gCurrencyCacheMutex);
// in order to handle racing correctly,
// not putting 'search' in a separate function.
int8_t found = -1;
int8_t found = -1;
for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
if (currCache[i]!= NULL &&
uprv_strcmp(locale, currCache[i]->locale) == 0) {
@ -1469,7 +1469,6 @@ getCacheEntry(const char* locale, UErrorCode& ec) {
}
umtx_lock(&gCurrencyCacheMutex);
// check again.
int8_t found = -1;
for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
if (currCache[i]!= NULL &&
uprv_strcmp(locale, currCache[i]->locale) == 0) {

Просмотреть файл

@ -418,7 +418,8 @@ private:
const char *path; /* working path (u_icudata_Dir) */
const char *nextPath; /* path following this one */
const char *basename; /* item's basename (icudt22e_mt.res)*/
const char *suffix; /* item suffix (can be null) */
StringPiece suffix; /* item suffix (can be null) */
uint32_t basenameLen; /* length of basename */
@ -432,13 +433,15 @@ private:
};
/**
* @param iter The iterator to be initialized. Its current state does not matter.
* @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
* @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l
* @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
* @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
* Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
* '/blarg/stuff.dat' would also be found.
* @param iter The iterator to be initialized. Its current state does not matter.
* @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
* @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l
* @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
* @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
* Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
* '/blarg/stuff.dat' would also be found.
* Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
* the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
*/
UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
const char *item, const char *inSuffix, UBool doCheckLastFour,
@ -566,7 +569,7 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode)
if(checkLastFour == TRUE &&
(pathLen>=4) &&
uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix, 4)==0 && /* suffix matches */
uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
@ -602,8 +605,13 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode)
/* + basename */
pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
if(*suffix) /* tack on suffix */
if (!suffix.empty()) /* tack on suffix */
{
if (suffix.length() > 4) {
// If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
// then we need to ensure that the path ends with a separator.
pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
}
pathBuffer.append(suffix, *pErrorCode);
}
}
@ -751,16 +759,19 @@ openCommonData(const char *path, /* Path from OpenChoice? */
UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
{
#ifdef UDATA_DEBUG
fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
#endif
uprv_mapFile(&tData, pathBuffer);
uprv_mapFile(&tData, pathBuffer, pErrorCode);
#ifdef UDATA_DEBUG
fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
#endif
}
if (U_FAILURE(*pErrorCode)) {
return NULL;
}
#if defined(OS390_STUBDATA) && defined(OS390BATCH)
if (!UDataMemory_isLoaded(&tData)) {
@ -769,7 +780,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */
uprv_strncpy(ourPathBuffer, path, 1019);
ourPathBuffer[1019]=0;
uprv_strcat(ourPathBuffer, ".dat");
uprv_mapFile(&tData, ourPathBuffer);
uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
}
#endif
@ -860,7 +871,7 @@ static UBool extendICUData(UErrorCode *pErr)
umtx_unlock(&extendICUDataMutex);
#endif
return didUpdate; /* Return true if ICUData pointer was updated. */
/* (Could potentialy have been done by another thread racing */
/* (Could potentially have been done by another thread racing */
/* us through here, but that's fine, we still return true */
/* so that current thread will also examine extended data. */
}
@ -986,12 +997,12 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
/* init path iterator for individual files */
UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
while((pathBuffer = iter.next(pErrorCode)) != NULL)
while ((pathBuffer = iter.next(pErrorCode)) != NULL)
{
#ifdef UDATA_DEBUG
fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
#endif
if(uprv_mapFile(&dataMemory, pathBuffer))
if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
{
pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
if (pEntryData != NULL) {
@ -1007,7 +1018,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
return pEntryData;
}
/* the data is not acceptable, or some error occured. Either way, unmap the memory */
/* the data is not acceptable, or some error occurred. Either way, unmap the memory */
udata_close(&dataMemory);
/* If we had a nasty error, bail out completely. */
@ -1076,6 +1087,11 @@ static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName
}
}
}
// If we failed due to being out-of-memory, then stop early and report the error.
if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
*pErrorCode = *subErrorCode;
return NULL;
}
/* Data wasn't found. If we were looking for an ICUData item and there is
* more data available, load it and try again,
* otherwise break out of this loop. */
@ -1252,7 +1268,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
}
tocEntryPathSuffix = tocEntryPath.data()+tocEntrySuffixIndex; /* suffix starts here */
// The +1 is for the U_FILE_SEP_CHAR that is always appended above.
tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
#ifdef UDATA_DEBUG
fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());

Просмотреть файл

@ -333,6 +333,43 @@ uprv_compareInvEbcdic(const UDataSwapper *ds,
# error Unknown charset family!
#endif
// utrie_swap.cpp -----------------------------------------------------------***
/**
* Swaps a serialized UTrie.
* @internal
*/
U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
/**
* Swaps a serialized UTrie2.
* @internal
*/
U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
/**
* Swaps a serialized UCPTrie.
* @internal
*/
U_CAPI int32_t U_EXPORT2
ucptrie_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
/**
* Swaps a serialized UTrie, UTrie2, or UCPTrie.
* @internal
*/
U_CAPI int32_t U_EXPORT2
utrie_swapAnyVersion(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
/* material... -------------------------------------------------------------- */

Просмотреть файл

@ -218,7 +218,7 @@ _uhash_allocate(UHashtable *hash,
U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
hash->primeIndex = primeIndex;
hash->primeIndex = static_cast<int8_t>(primeIndex);
hash->length = PRIMES[primeIndex];
p = hash->elements = (UHashElement*)
@ -860,13 +860,13 @@ uhash_hashUChars(const UHashTok key) {
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) {
const char *s = (const char *)key.pointer;
return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, uprv_strlen(s)));
return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
}
U_CAPI int32_t U_EXPORT2
uhash_hashIChars(const UHashTok key) {
const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
}
U_CAPI UBool U_EXPORT2

Просмотреть файл

@ -53,22 +53,6 @@ uprv_isInvariantString(const char *s, int32_t length);
U_INTERNAL UBool U_EXPORT2
uprv_isInvariantUString(const UChar *s, int32_t length);
#ifdef __cplusplus
/**
* Check if a UnicodeString only contains invariant characters.
* See utypes.h for details.
*
* @param s Input string.
* @return TRUE if s contains only invariant characters.
*/
U_INTERNAL inline UBool U_EXPORT2
uprv_isInvariantUnicodeString(const icu::UnicodeString &s) {
return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length());
}
#endif /* __cplusplus */
/**
* \def U_UPPER_ORDINAL
* Get the ordinal number of an uppercase invariant character

Просмотреть файл

@ -0,0 +1,722 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// file name: ulayout_props_data.h
//
// machine-generated by: icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp
#ifdef INCLUDED_FROM_UPROPS_CPP
static const int32_t maxInPCValue = 14;
static const uint16_t inpc_trieIndex[765]={
0,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x80,0xc0,0xff,0x13f,0x17e,0x1be,0x17e,0x1fe,0x23e,0x27e,0x2bc,0x2fc,
0x33c,0x37b,0x23e,0x3bb,0x3fb,0x439,0x477,0x4ad,0x4e1,0x521,0x531,0x571,0x599,0x5d9,0x619,0x656,
0x2b7,0x2c6,0x2d2,0x2c6,0x2ed,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0,0x10,0x20,
0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,
0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0x80,0x90,0xa0,
0xb0,0xc0,0xd0,0xe0,0xf0,0xff,0x10f,0x11f,0x12f,0x13f,0x14f,0x15f,0x16f,0x17e,0x18e,0x19e,
0x1ae,0x1be,0x1ce,0x1de,0x1ee,0x17e,0x18e,0x19e,0x1ae,0x1fe,0x20e,0x21e,0x22e,0x23e,0x24e,0x25e,
0x26e,0x27e,0x28e,0x29e,0x2ae,0x2bc,0x2cc,0x2dc,0x2ec,0x2fc,0x30c,0x31c,0x32c,0x33c,0x34c,0x35c,
0x36c,0x37b,0x38b,0x39b,0x3ab,0x23e,0x24e,0x25e,0x26e,0x3bb,0x3cb,0x3db,0x3eb,0x3fb,0x40b,0x41b,
0x42b,0x439,0x449,0x459,0x469,0x477,0x487,0x497,0x4a7,0x4ad,0x4bd,0x4cd,0x4dd,0x4e1,0x4f1,0x501,
0x511,0x521,0x531,0x541,0x551,0x531,0x541,0x551,0x561,0x571,0x581,0x591,0x5a1,0x599,0x5a9,0x5b9,
0x5c9,0x5d9,0x5e9,0x5f9,0x609,0x619,0x629,0x639,0x649,0x656,0x666,0x676,0x686,0,0,0x68b,
0x69a,0,0x6a9,0x6b8,0x6c7,0x6d5,0x6e5,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0x6f3,0,0x6f3,
0,0x701,0,0x701,0,0,0,0x70b,0x71b,0x729,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x739,0x749,0,0,
0,0,0,0,0,0x759,0x768,0,0,0,0x772,0,0,0,0x77e,0x78d,
0x79b,0,0,0,0,0,0,0,0,0x7ab,0,0,0x7b7,0x7c7,0,0x7cc,
0x52c,0x81,0,0x7dc,0,0,0,0x7ea,0x3fb,0,0,0x7fa,0x807,0,0,0,
0,0,0,0,0,0,0x817,0x827,0x835,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x2b3,0x83f,0,0x84c,0,0,0,0,
0,0x101,0,0,0x858,0x864,0,0x874,0x882,0,0,0x892,0,0x8a0,0x3fb,0,
0,0x80,0,0,0x8b0,0x8c0,0,0x2b9,0,0,0x8c7,0x8d6,0x8e3,0,0,0x8f1,
0,0,0,0x901,0x2bd,0,0x911,0x151,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x921,0,0x930,0,0,0x940,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x950,0,0,0x958,0x966,0,0,0,
0x81,0,0,0x976,0,0,0,0,0x52d,0,0x981,0x991,0x3cb,0,0,0x659,
0x81,0,0,0x99e,0x9ae,0,0,0,0x9bb,0x9cb,0,0,0,0,0,0,
0,0,0,0x71,0x9db,0,0xff,0,0,0x9e6,0x9f6,0x14f,0xa04,0x52b,0,0,
0,0,0,0,0,0,0x99c,0xa14,0x16f,0,0,0,0,0,0xa24,0xa33,
0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,0xa43,0xe3,
0x214,0,0,0,0xa53,0x2be,0,0,0,0,0,0xa63,0xa73,0,0,0,
0,0,0xa7b,0xa8b,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa97,0xaa6,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xab5,
0,0,0xac2,0,0xad1,0,0,0xadd,0xae7,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,
0xaf7,0,0,0,0,0,0xb07,0xb0f,0xb1e,0,0,0,0,0,0,0,
0xb2d,0xb3c,0,0,0,0xb44,0xb54,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb61,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x45,0x4d,0x4d,0x4d,0x5d,0x7d,0x9d,0xbd,0xdd,
2,2,0xec,0x10a,0x129,0x149,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,0x169,0x188,2,2,2,2,2,2,2,2,
2,2,0x1a8,2,2,0x1c8,0x1e6,0x203,0x221,0x23f,0x25f,0x27d,0x297
};
static const uint8_t inpc_trieData[2930]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,8,8,7,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,8,7,1,0,7,4,
7,1,1,1,1,8,8,8,8,7,7,7,7,1,4,7,
0,8,1,8,8,8,1,1,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,
1,1,1,1,0,0,4,4,0,0,5,5,1,0,0,0,
0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,8,
8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,1,
1,0,0,0,0,8,8,0,0,8,8,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,
0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,1,
1,1,1,8,0,8,8,0xd,0,7,7,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,8,8,8,8,8,8,0,8,
7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,7,8,7,1,
1,1,1,0,0,4,0xb,0,0,5,0xc,1,0,0,0,0,
0,0,0,0,8,0xd,0,0,0,0,0,0,0,0,0,0,
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,7,7,8,7,7,0,
0,0,4,4,4,0,5,5,5,8,0,0,0,0,0,0,
0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,8,7,7,7,
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,8,8,7,7,7,7,
0,8,8,9,0,8,8,8,8,0,0,0,0,0,0,0,
8,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xd,7,7,7,7,
0,8,0xd,0xd,0,0xd,0xd,8,8,0,0,0,0,0,0,0,
7,7,0,0,0,0,0,0,0,0,0,0,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,8,8,7,7,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,8,8,0,7,7,7,1,1,0,4,
4,4,0,5,5,5,8,0,0,0,0,0,0,0,0,0,
7,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,8,0,0,0,0,7,7,7,8,
8,1,0,1,0,7,4,0xb,4,5,0xc,5,7,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,7,8,7,7,8,8,8,8,1,1,1,0,0,0,0,
0,0xe,0xe,0xe,0xe,0xe,7,0,8,8,8,8,8,8,8,8,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,7,8,7,7,8,8,8,8,1,1,0,8,1,0,0,
0,0xe,0xe,0xe,0xe,0xe,0,0,0,8,8,8,8,8,8,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
1,0,8,0,0,0,0,7,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,8,9,1,1,9,
9,9,9,8,8,8,8,8,7,8,9,8,8,1,0,8,
8,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,
1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,7,7,8,8,1,4,8,8,8,8,
8,1,7,0,8,7,0,1,1,0,0,0,0,0,0,7,
7,1,1,0,0,0,0,1,1,0,7,7,7,0,0,7,
7,7,7,7,7,7,0,0,8,8,8,8,0,0,0,0,
0,0,0,0,0,0,0,1,7,4,8,8,7,7,7,7,
7,7,1,0,7,0,0,0,0,0,0,0,0,0,0,7,
7,7,8,0,0,8,1,1,0,0,0,0,0,0,0,0,
0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0,
0,7,8,8,8,8,1,1,1,0xb,0xc,5,4,4,4,5,
5,8,7,7,8,8,8,8,8,8,8,0,8,0,0,0,
0,0,0,0,0,0,8,0,0,8,8,1,7,7,0xd,0xd,
8,8,7,7,7,0,0,0,0,7,7,1,7,7,7,7,
7,7,1,8,1,0,0,0,0,7,7,7,7,7,0xe,0xe,
0xe,7,7,0xe,7,7,7,7,7,0,0,0,0,0,0,0,
7,7,0,0,0,0,0,0,0,8,1,4,7,8,0,0,
0,0,0,4,1,7,8,8,8,1,1,1,1,0,7,8,
7,7,8,8,8,8,1,1,8,1,7,4,4,4,8,8,
8,8,8,8,8,8,8,8,0,0,1,8,8,8,8,7,
0,0,0,0,0,0,0,0,0,0,0,8,7,8,8,1,
1,1,3,9,0xa,4,4,5,5,8,0xd,7,0,0,0,0,
0,0,0,0,0,0,0,8,1,8,8,8,0,7,1,1,
8,1,4,7,8,8,7,0,1,1,0,0,0,0,0,0,
8,7,8,8,7,7,7,8,7,8,0,0,0,0,7,7,
7,4,4,0xb,7,7,1,8,8,8,8,4,4,8,1,0,
0,0,0,0,0,0,0,8,8,8,0,6,1,1,1,1,
1,8,8,1,1,1,1,8,7,6,6,6,6,6,6,6,
0,0,0,0,1,0,0,0,0,8,0,0,7,0,0,0,
0,0,0,0,0,8,0,0,0,0,8,0,0,0,0,7,
7,1,8,7,0,0,0,0,0,0,0,0,7,7,7,7,
7,7,7,7,7,7,7,7,1,8,0,0,0,0,0,0,
0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,
0,8,0,0,0,0,0,0,0,0,0,0,0,1,1,1,
0,0,0,0,0,0,0,1,1,1,8,1,1,1,1,8,
0,0,0,8,7,7,8,8,1,1,4,4,8,7,7,2,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,8,8,8,1,8,4,8,1,7,4,1,1,0,0,0,
0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,8,
7,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0,
0,8,7,8,8,1,0xe,0xe,8,8,0xe,7,0xe,0xe,7,8,
8,0,0,0,0,0,0,0,0,0,0,0,4,1,8,4,
7,0,0,0,7,7,8,7,7,1,7,7,0,7,1,0,
0,6,1,1,0,8,6,0,0,0,0,0,1,1,1,8,
0,0,0,0,0,0,0,0,8,1,1,0,0,0,0,0,
7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
8,8,8,8,1,1,1,1,8,8,8,8,8,0,0,0,
0,0,0,0,0,0,7,4,7,1,1,8,8,7,7,1,
1,0,0,0,0,0,0,0,8,8,8,1,1,4,8,9,
9,8,1,1,0,8,0,0,0,0,0,0,0,0,0,0,
0,7,4,7,1,1,1,1,1,1,8,8,8,0xd,7,0,
0,0,0,0,0,0,0,1,0,8,1,0,0,0,0,0,
0,0,0,0,0,0,0,7,7,7,1,8,8,0xd,0xd,8,
7,8,8,0,0,0,0,0,0,8,0,7,4,7,1,1,
8,8,8,8,1,1,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,7,7,8,7,7,7,7,0,0,4,4,0,
0,5,5,7,0,0,7,7,0,0,8,8,8,8,8,8,
8,0,0,0,7,7,1,8,8,7,1,0,0,0,0,0,
0,0,0,0,7,4,7,1,1,1,1,1,1,4,8,0xb,
5,7,5,8,7,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,4,7,1,1,1,1,0,0,4,0xb,5,0xc,8,
8,7,1,7,7,7,1,1,1,1,1,1,8,8,7,7,
8,7,1,0,0,0,0,0,0,0,0,0,0,0,8,7,
8,4,7,1,1,8,8,8,8,7,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,8,7,7,8,8,1,
1,4,8,1,8,8,8,0,0,0,0,0,0,0,0,0,
0,0,0,7,4,7,1,1,1,8,8,8,8,8,7,1,
1,0,0,0,0,0,8,1,1,8,8,8,8,8,8,1,
0,0,0,0,0,1,1,8,8,8,8,7,0,1,1,1,
1,0,8,1,1,8,8,8,7,7,1,1,1,0,0,0,
0,0,0,0,0,0,0,1,1,1,1,1,1,8,7,8,
0,0,0,0,0,0,0,8,8,1,1,1,1,1,0,8,
8,8,8,8,8,7,1,0,0,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,0,7,1,1,1,1,1,1,4,
1,8,7,8,8,0,0,0,0,0,0,0,0,0,8,8,
8,8,8,1,0,0,0,8,0,8,8,0,8,8,1,8,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,
7,7,7,0,8,8,0,7,7,8,7,0,0,0,0,0,
0,0,0,0,8,1,4,7,0,0,0,0,0,0,0,0,
0,0
};
static const UCPTrie inpc_trie={
inpc_trieIndex,
{ inpc_trieData },
765, 2930,
0x12000, 0x12,
1, 2,
0, 0,
0x2, 0x0,
0x0,
};
static const int32_t maxInSCValue = 35;
static const uint16_t insc_trieIndex[834]={
0,0x40,0x60,0x94,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0xd4,0x112,0x152,0x190,0x1cf,0x20d,0x24c,0x28a,0x2ca,0x308,0x346,0x384,
0x3c4,0x402,0x441,0x47f,0x4bf,0x4fd,0x53d,0x57d,0x5bc,0x5fc,0x63b,0x67b,0x69b,0x6db,0x71b,0x758,
0x2f8,0x30b,0x317,0x30b,0x332,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x60,0x70,0x80,
0x90,0x94,0xa4,0xb4,0xc4,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,
0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,
0x70,0x40,0x50,0x60,0x70,0xd4,0xe4,0xf4,0x104,0x112,0x122,0x132,0x142,0x152,0x162,0x172,
0x182,0x190,0x1a0,0x1b0,0x1c0,0x1cf,0x1df,0x1ef,0x1ff,0x20d,0x21d,0x22d,0x23d,0x24c,0x25c,0x26c,
0x27c,0x28a,0x29a,0x2aa,0x2ba,0x2ca,0x2da,0x2ea,0x2fa,0x308,0x318,0x328,0x338,0x346,0x356,0x366,
0x376,0x384,0x394,0x3a4,0x3b4,0x3c4,0x3d4,0x3e4,0x3f4,0x402,0x412,0x422,0x432,0x441,0x451,0x461,
0x471,0x47f,0x48f,0x49f,0x4af,0x4bf,0x4cf,0x4df,0x4ef,0x4fd,0x50d,0x51d,0x52d,0x53d,0x54d,0x55d,
0x56d,0x57d,0x58d,0x59d,0x5ad,0x5bc,0x5cc,0x5dc,0x5ec,0x5fc,0x60c,0x61c,0x62c,0x63b,0x64b,0x65b,
0x66b,0x67b,0x68b,0x69b,0x6ab,0x69b,0x6ab,0x6bb,0x6cb,0x6db,0x6eb,0x6fb,0x70b,0x71b,0x72b,0x73b,
0x74b,0x758,0x768,0x778,0x788,0xe9,0xe9,0x798,0x7a3,0x7b3,0x7c3,0x7d2,0x7e1,0x7ef,0x7ff,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x80f,0x81d,0xe6,0x81d,0xe6,0x82d,0x80f,0x83d,0xe9,0xe9,0x84d,
0x859,0x863,0x872,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x882,0x16c,0x892,0x8a2,0x22d,0xe9,0x8b2,0x8c2,0xe9,0xe9,0x374,0x8d2,
0x8e1,0x30,0x40,0x40,0xe9,0x8f1,0xe9,0xe9,0x901,0x90e,0x91e,0x92a,0x30,0x30,0x40,0x40,
0x40,0x40,0x40,0x40,0x93a,0xe6,0xe9,0x94a,0x956,0x30,0x40,0x40,0x966,0xe9,0x975,0x985,
0xe9,0xe9,0x995,0x9a5,0xe9,0xe9,0x9b5,0x9c2,0x9d2,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x9e2,0x9f0,0x9fe,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0xa08,0xa14,0xa24,0x40,0x40,0x40,0x40,0x40,0x75a,0xa32,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x74,0x40,0x40,0x40,0xa42,0xe9,0xa4f,
0x40,0xe9,0xa5f,0xa6d,0xa7c,0xd6,0xe7,0xe9,0xa8c,0xa98,0x30,0xaa8,0xab6,0xac6,0xe9,0xad4,
0xe9,0xae4,0xaf3,0x40,0x40,0xb03,0xe9,0xe9,0xb12,0x297,0x30,0xb22,0xb32,0xe3,0xe9,0x889,
0xb42,0xb52,0x30,0xe9,0xb61,0xe9,0xe9,0xe9,0xb71,0xb81,0x40,0xb91,0xba1,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xbb1,0xbc1,0xbce,0x30,0xbde,0xbee,0xe9,
0xbf8,0x31,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xc08,0xe6,0xe9,
0x88a,0xc18,0xc26,0xc30,0xc40,0xc50,0xe9,0xe9,0xc60,0x40,0x40,0x40,0x40,0xc70,0xe9,0x88b,
0xc80,0xc90,0xca0,0xe9,0xcad,0xd5,0xe8,0xe9,0xcbd,0xccd,0x30,0x6ba,0x35,0xe1,0x3eb,0x886,
0xcdd,0x40,0x40,0x40,0x40,0xced,0x16d,0xcfc,0xdf,0xe9,0xd0c,0xd1c,0x30,0xd2c,0x162,0x172,
0xd3c,0x308,0xd4c,0xd5c,0x9ed,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xe9,
0xd6c,0xd7a,0xd8a,0x40,0x40,0xd99,0xe9,0xe9,0x91f,0xda9,0x30,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xff,0xdb9,0xdc9,0xdd1,0x40,0x40,0xdb,0xe9,0xe9,
0xde1,0xdf1,0x30,0x40,0x40,0xdf,0xe9,0xe01,0xe0e,0x30,0x40,0x40,0x40,0xe9,0xe1e,0xe2e,
0xe3e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdf,0xe9,0x886,
0xe4e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xe5e,0xe9,0xe9,
0xe6b,0xe7b,0xe8b,0xe9,0xe9,0xe97,0xea1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xeb1,0xe9,0xff,
0xec1,0xed1,0x6bb,0xee1,0x555,0xe9,0xeef,0x72b,0xeff,0x40,0x40,0x40,0x40,0xf0f,0xe9,0xe9,
0xf1e,0xf2e,0x30,0xf3e,0xe9,0xf4a,0xf57,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0xe9,0xf67,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x45,0x55,0x55,0x55,0x65,0x85,0xa5,0xc5,
0xe5,4,4,0xf5,0x114,0x134,0x154,4,0x174,4,0x17d,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,0x19d,0x1bd,4,4,4,
4,4,4,4,4,4,4,0x1dd,4,4,0x1fd,0x21d,0x23d,0x25d,0x27d,0x29d,
0x2bd,0x2d8
};
static const uint8_t insc_trieData[3960]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x1c,0x1c,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x22,0x22,
0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,
0x22,0x22,0,4,4,0,0,0x22,0x22,0x22,5,5,5,5,5,5,
5,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0,0,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,
5,5,0xc,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,
0,0x23,0x23,0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,
5,5,5,0,5,0,0,0,5,5,5,5,0,0,0x17,1,
0x22,0x22,0x22,0x22,0x22,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,6,0,
0,0,0,0,0,0,0,0x22,0,0,0,0,5,5,0,5,
0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
5,5,0,0,0,0,0,0,0,0,0,0,2,0,0x1c,0,
2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0,0,0x23,0x23,
0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,
0,5,5,0,5,5,0,5,5,0,0,0x17,0,0x22,0x22,0x22,
0,0,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,0,0,0,4,0,
0,0,0,0,0,0,5,5,5,5,0,5,0,0,0,0,
0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,2,0x12,0xc,
0xc,0,0xb,0,0,0,0,0,0,0,0,0,0,2,2,0x20,
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0,0x23,
0x23,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5,
0,5,5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,
0,0x22,0x22,0x22,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x23,0x23,0x22,0x22,0,0,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,
0,0,0,5,4,4,4,0x17,0x17,0x17,0,2,2,0x20,0,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,0x23,0,0,0x23,0x23,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,0,5,5,5,5,5,5,5,0,5,5,0,5,
5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0,0,0x22,
0x22,0,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,0x22,0x22,
0,0,0,0,5,5,0,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,5,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0x15,0,0x23,0x23,0x23,0x23,0x23,
0x23,0,0,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,0,0,0,5,
5,0,5,0,5,5,0,0,0,5,5,0,0,0,5,5,
5,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
0,0,0,0,0x22,0x22,0x22,0,0,0,0x22,0x22,0x22,0,0x22,0x22,
0x22,0x1f,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,0x20,2,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
0x23,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
0,1,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,
0,0,0,0,0,0,0,0x22,0x22,0,5,5,5,0,0,0,
0,0,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,
0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,
5,5,5,5,5,0,5,5,5,5,5,0,0,0x17,1,0x22,
0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0,0,0,
0,0,0,0,0x22,0x22,0,0,0,0,0,0,0,5,0,0x23,
0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,
0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,
0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,1,0x22,0x22,0x22,
0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0xd,0,0,0,0,
0,6,6,6,0x22,0,0,0,0,0,0,0,0x23,0x23,0x23,0x22,
0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,
0,0,0,0,0,0,0,6,6,6,6,6,6,0,0,2,
0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
0x23,0x23,0x23,0x23,0,0,0,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
5,5,5,5,5,5,5,5,5,0,5,0,0,5,5,5,
5,5,5,5,0,0,0,0x1f,0,0,0,0,0x22,0x22,0x22,0x22,
0x22,0x22,0,0x22,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0,0,
0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0x22,
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,0,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0,0x22,0x22,0x22,0x22,
0x22,0x22,0,0x22,0x1e,0x1e,0x1e,0x1e,0xa,2,0x1a,0,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,0,5,
0,0,5,5,0,5,0,0,5,0,0,0,0,0,0,5,
5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,0,
5,0,5,0,0,5,5,0,5,5,0,0x22,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0,0x22,0xb,0xb,0,0,0x22,0x22,0x22,0x22,0x22,
0,0,0,0x1e,0x1e,0x1e,0x1e,0,2,0,0,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0,0,5,5,5,5,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,
0x1c,0,0x1c,0,0x17,0,0,0,0,0,0,5,5,5,5,5,
5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,0,0,0,0,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x22,2,2,0x1a,
1,0,0,8,8,8,8,8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xf,0xf,0xf,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0,0,0,0,0,0,0x1c,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,5,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,0x20,0x13,0x1a,0xb,0xb,
0xb,0xb,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0xc,0,
0,0xc,0,5,5,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,5,5,5,
5,0xb,0xb,5,0x22,0x1e,0x1e,5,5,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,
5,5,0x22,0x22,0x22,0x22,5,5,5,5,5,5,5,5,5,5,
5,0xb,0x22,0x22,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,5,0x1e,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1e,0x1e,0x22,0x22,0,0,0x23,
0x23,0x23,5,5,5,5,5,5,5,5,5,5,0,5,5,0x22,
0x22,0x1a,0,0,0,0,0,0,0,0,0,0,0,5,5,0x22,
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0x22,
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x1b,0x1b,0x1c,0x10,
0xa,0x1c,0x1c,0x1a,0x13,0x1c,0,0,0,0,0,0,0,0,1,0x1c,
0,0,0xc,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,0xf,0xf,0,0,
0,0,7,7,2,7,7,7,7,7,7,7,0x22,0x1c,0,0,
0,0,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,
0,0,0x1d,0x1d,0x1d,0x1d,0x1d,0,0,0,0,0,0,0,0,0,
0,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,
0x22,0x22,7,7,7,7,7,7,7,0x1e,0x1e,0,0,0,0,0,
0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,
0,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,0x23,
0x23,5,5,0xb,0xb,0xf,7,7,9,0xf,0xf,0xf,0xf,0,0x13,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,
0x1e,0x1e,0x1e,0x1e,0x1a,0x1c,0x1c,0,0,0x1c,2,2,2,0x10,0x20,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,0x17,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,5,5,5,5,5,
5,5,0,0,0,0,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
5,5,5,5,5,5,0xf,0xf,0xf,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,
0x13,0xf,0xf,5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,1,
5,5,5,7,7,5,5,5,5,0x23,0x23,0x17,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,7,7,0x1a,0x1a,0,0,0,0,0,0,0,
0,0,0,0,0,5,5,5,5,0xf,0xf,0x22,0x22,0x22,0x22,0x22,
0x22,0x22,7,7,7,7,2,2,0x1c,0x17,0,0,0,0,0,0,
0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,5,
5,5,4,4,4,0,4,4,4,4,4,4,4,4,4,4,
4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x20,0x20,4,0x11,0x11,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0x1c,0,0,0,0,0,0,0,0,0,0,0,0,
0x16,0x14,0,0,0xc,0xc,0xc,0xc,0xc,0,0,0,0,0,0,0,
0,0,0,0,0x1c,0x1c,0x1c,0,0,0,0,0,0,0,0,0,
0,0,0x23,0x23,0,0x23,0x23,0x23,0x1a,5,5,5,5,2,5,5,
5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,0,0,0,0,0,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,0x21,0x21,5,
5,5,5,0x21,0xf,0xf,5,5,5,5,5,5,5,0xf,5,2,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
0xb,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,2,2,0,0,0,0,0,0,
0,0,0,0,0x23,0x22,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
5,5,5,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x1e,
0x1e,0x1e,0,0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,7,7,7,0x1a,0,0,0,0,0,0,0,0,0,
0,0,0,2,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,5,5,5,0x23,
0x23,0x23,5,5,5,0x17,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,
0xb,0xb,5,5,5,5,5,0x22,0,5,5,5,5,5,5,5,
5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,5,5,5,5,
5,0,0x22,0x22,0x22,0xb,0xb,0xb,0xb,0,0,0,0,0,0,0,
0,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
0,0,5,5,5,0xc,0xc,0xc,0,0,0,5,0x1e,0x1e,0x1e,5,
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,
0x1e,0x1d,0x1e,0x1d,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x23,0x23,5,5,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,
0x22,0,0,0,0,0,0x20,0x13,0,0,0,0,0,0,0,0,
0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,
0x23,5,0x23,5,5,5,5,5,5,5,5,5,7,7,7,7,
7,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x1e,0x1a,0,0,5,0x22,
0x22,0x22,0,0x22,0x22,0,0,0,0,0,0x22,0x22,2,0x20,5,5,
5,5,0,5,5,5,0,5,5,5,5,5,5,5,0,0,
0x17,0x17,0x17,0,0,0,0,0x13,2,2,0x20,0x11,0x11,0x23,0x23,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0,
0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x19,
2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,
2,2,0x20,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,
0x22,0x22,0x22,0x13,0x1a,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0,0,0,0,5,0x22,0x22,0,0,0,0,0,0,0,0,0,
0x21,0x21,0x21,0x21,0x21,5,5,5,5,5,5,5,5,5,5,5,
0x17,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,1,0xe,
0xe,0,0,0,0,0,0x1c,0x17,0x22,0x22,0,0,0,0x22,0x22,0x22,
0x22,2,0x1f,0x17,0x12,0,0,0,0,0,0,4,0,0x23,0x23,0x23,
0x23,5,5,5,0,5,0,5,5,5,5,0,5,5,5,5,
5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,2,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0x17,0x1a,0,0,0,0,0,2,2,2,0x20,
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,5,0,5,5,
0,5,5,5,5,5,0,0x17,0x17,1,0x22,0x22,0,0,0,0,
0,0,0,0x22,0,0,0,0,0,0,2,2,0x23,0x23,0x22,0x22,
0,0,4,4,4,4,4,4,4,0,0,0,5,5,5,5,
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,2,0x20,
0x17,1,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0,0,0,0,0x1c,0,0x23,0x23,0x23,0x23,0x23,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,2,0x20,0x1f,0x17,1,0,0,
0,0,0,0,0,0,0,0,0,0x22,0x22,0x22,0x22,0x22,0x22,0,
0,0x22,0x22,0x22,0x22,2,2,0x20,0x1f,0x17,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x23,0x23,0x23,0x23,0x22,0x22,0,
0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,
0x1f,0x22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,5,5,5,5,5,5,5,5,5,5,5,2,0x20,0x22,0x22,
0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,0,0,0,5,5,
5,5,5,5,5,5,5,5,5,0,0,0xb,0xb,0xb,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,2,0x20,0x1f,0x17,0,0,0,0,0,0x23,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,5,0x1c,0x1a,
2,2,2,2,0x20,0xe,0xb,0xb,0xb,0xb,0xc,0,0,0,0,0,
0xc,0,0x13,0,0,0,0,0,0,0,0,0x23,0x22,0x22,0x22,0x22,
0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,0,0,0xe,0xe,0xe,
0xe,7,7,7,7,7,7,2,0x20,0x12,0x13,0,0,0,1,0,
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0x23,5,
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0x22,2,2,0x20,
0x1f,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,
0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x22,
0x22,0x22,0x22,0x22,2,2,0,0,0,0,0,0,0,0,0,0x23,
0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,5,5,5,5,0x22,
0x22,0x22,0x22,0x22,0x22,0,0,0,0x22,0,0x22,0x22,0,0x22,2,0x20,
0x17,0x22,0x1a,0x13,0xd,0xb,0,0,0,0,0,0,0,0,0x23,0x23,
0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,0x23,5,5,5,5,5,5,
5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,2,0x20,0x13,0,
0,0,0,0,0,0,0,5,5,0xc,0x22,0x22,0x22,0x22,0,0,
0,0,0,0,0,0,0,0
};
static const UCPTrie insc_trie={
insc_trieIndex,
{ insc_trieData },
834, 3960,
0x12000, 0x12,
1, 2,
0, 0,
0x4, 0x40,
0x0,
};
static const int32_t maxVoValue = 3;
static const uint16_t vo_trieIndex[1100]={
0,0x40,0x59,0x98,0,0,0,0,0,0,0,0xd0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x33b,0x355,0x363,0x379,0x399,0x3b7,0x3d2,0x3ec,0x355,0x355,0x355,0x40c,0x355,0x355,0x355,0x40c,
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x355,0x355,0x355,0x40c,
0x355,0x355,0x355,0x40c,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x59,0x69,0x79,0x89,
0x98,0xa8,0xb8,0xc8,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
0,0x10,0x20,0x30,0xd0,0xe0,0xf0,0x100,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x10f,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa9,0x96,0x11e,0x12c,0xae,0xaa,0,0,0,0,0,
0,0x103,0x13c,0,0x14c,0x158,0x166,0x10b,0x175,0x110,0x110,0x110,0x184,0,0,0,
0,0,0,0,0x72,0,0xf6,0,0,0,0,0,0,0,0,0,
0,0,0,0x190,0x110,0x198,0,0,0,0,0x103,0x110,0x115,0,0xec,0x1a8,
0x1b6,0x10e,0x110,0x110,0x1c6,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,
0x110,0x110,0x116,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x118,0x10a,0x110,0x1d2,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x10e,0x110,0,0,
0x116,0,0,0,0,0,0x108,0x110,0x1e2,0x114,0x110,0,0,0,0,0,
0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x1f1,0x1ff,0x110,0x20e,0x21d,
0x110,0x22a,0x110,0x237,0x246,0x256,0x110,0x22a,0x110,0x237,0x261,0x110,0x110,0x26e,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x27e,0x27e,0x27e,0x27e,
0x286,0x110,0x28e,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,0,
0,0,0,0x110,0,0x110,0x117,0x29b,0x2aa,0,0,0,0,0,0,0,
0,0,0x2ba,0x2c9,0x110,0x2d9,0x110,0x2e9,0x2f8,0,0,0,0,0,0,0,
0x308,0x318,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,
0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0,0,0,0,0,0,0,0,0x328,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x112,0x84,0x98,0xa8,0xa8,0xa8,
0xa8,0xa8,0xa8,0xc8,0xc,0xe8,0x100,0x115,0xc,0xc,0xc,0x134,0x153,0x172,0x191,0xc,
0x1ab,0xc,0x1cb,0x1eb,0x20b,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0xfb,0xc,0x243,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0xc,0xc,0xc,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0x262,0xc,0xc,0xc,0xc,0x282,0xc,0xc,
0xc,0xc,0xc,0x29c,0xc,0xc,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
0xc,0x223,0x223,0x2b9,0xc,0xc,0xc,0xc,0xc,0x223,0x100,0xc,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2bc,0x223,
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2da,0xf8,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0x223,0x2fa,0xc,0xc,0x223,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0x223,0x31a,0x223,0x223,0xc8,0x2b5,0xc,0xc,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x31b,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc
};
static const uint8_t vo_trieData[828]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,0,0,0,3,0,0,3,0,0,0,0,0,
0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
0,0,0,0,0,0,0,0,0,3,3,0,0,0,3,0,
0,0,0,3,3,3,0,0,0,0,0,0,3,0,3,3,
3,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,
3,3,3,3,3,3,0,0,0,0,0,3,3,0,3,3,
0,0,0,0,0,0,3,3,3,3,0,3,0,3,0,3,
0,0,0,0,3,0,0,0,0,0,3,3,3,3,3,3,
0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,
3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3,
3,1,1,3,0,0,0,0,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,
3,3,0,0,0,0,3,3,3,0,3,3,3,3,3,3,
3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,
0,0,3,3,0,3,3,3,3,3,3,3,3,3,3,3,
3,3,2,2,3,3,3,3,3,1,1,1,1,1,1,1,
1,3,3,1,1,1,1,1,1,1,1,1,1,1,1,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,
2,3,2,3,2,3,2,3,3,3,3,3,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,
3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,
3,2,2,3,3,3,1,2,3,2,3,2,3,2,3,2,
3,3,3,3,3,3,2,2,3,3,3,3,3,1,3,3,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,
3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,
3,3,3,0,1,1,1,1,1,1,3,3,3,0,0,0,
0,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,
3,3,1,1,3,3,2,0,2,3,3,3,3,3,3,3,
3,3,3,1,1,0,0,0,2,3,3,3,3,3,3,3,
3,3,3,3,1,3,1,3,1,3,3,3,3,3,3,3,
3,3,3,3,1,1,1,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,3,3,3,1,3,3,3,3,
0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,
3,0,0,0,3,3,0,0,2,2,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,0,0,0,0
};
static const UCPTrie vo_trie={
vo_trieIndex,
{ vo_trieData },
1100, 828,
0x110000, 0x110,
1, 2,
0, 0,
0xc, 0x0,
0x0,
};
#endif // INCLUDED_FROM_UPROPS_CPP

Просмотреть файл

@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
}
keywordsLen += keywordList[i].keywordLen + 1;
if(valuesToo) {
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
}
keywordsLen += keywordList[i].valueLen;
@ -1133,7 +1133,7 @@ uloc_setKeywordValue(const char* keywordName,
keyValuePrefix = ';'; /* for any subsequent key-value pair */
updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
updatedKeysAndValues.append('=', *status);
updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
}
if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
/* append new entry at the end, it sorts later than existing entries */
@ -1500,7 +1500,7 @@ _deleteVariant(char* variants, int32_t variantsLen,
}
if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
(variantsLen == toDeleteLen ||
(flag=(variants[toDeleteLen] == '_'))))
(flag=(variants[toDeleteLen] == '_')) != 0))
{
int32_t d = toDeleteLen + (flag?1:0);
variantsLen -= d;
@ -2412,7 +2412,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult
/* eat spaces prior to semi */
for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
;
int32_t slen = ((t+1)-s);
int32_t slen = static_cast<int32_t>(((t+1)-s));
if(slen > ULOC_FULLNAME_CAPACITY) {
*status = U_BUFFER_OVERFLOW_ERROR;
return -1; // too big

Просмотреть файл

@ -228,7 +228,7 @@ initFromResourceBundle(UErrorCode& sts) {
// a timezone key uses a colon instead of a slash in the resource.
// e.g. America:Los_Angeles
if (uprv_strchr(legacyTypeId, ':') != NULL) {
int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
int32_t legacyTypeIdLen = static_cast<int32_t>(uprv_strlen(legacyTypeId));
char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
if (legacyTypeIdBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
@ -320,7 +320,7 @@ initFromResourceBundle(UErrorCode& sts) {
if (isTZ) {
// replace colon with slash if necessary
if (uprv_strchr(from, ':') != NULL) {
int32_t fromLen = uprv_strlen(from);
int32_t fromLen = static_cast<int32_t>(uprv_strlen(from));
char* fromBuf = (char*)uprv_malloc(fromLen + 1);
if (fromBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
@ -472,7 +472,6 @@ isSpecialTypeRgKeyValue(const char* val) {
p++;
}
return (subtagLen == 6);
return TRUE;
}
U_CFUNC const char*

Просмотреть файл

@ -12,11 +12,13 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
#include "uvector.h"
#include "uassert.h"
@ -77,19 +79,34 @@ static const char LOCALE_TYPE_YES[] = "yes";
#define LANG_UND_LEN 3
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
This table has 2 parts. The parts for Grandfathered tags is generated by the
following scripts from the IANA language tag registry.
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
egrep -A 7 'Type: grandfathered' | \
egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
tr 'A-Z' 'a-z'
The 2nd part is made of five ICU-specific entries. They're kept for
the backward compatibility for now, even though there are no preferred
values. They may have to be removed for the strict BCP 47 compliance.
*/
static const char* const GRANDFATHERED[] = {
/* grandfathered preferred */
"art-lojban", "jbo",
"cel-gaulish", "xtg-x-cel-gaulish",
"en-GB-oed", "en-GB-x-oed",
"en-gb-oed", "en-gb-oxendict",
"i-ami", "ami",
"i-bnn", "bnn",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-hak", "hak",
"i-klingon", "tlh",
"i-lux", "lb",
"i-mingo", "see-x-i-mingo",
"i-navajo", "nv",
"i-pwn", "pwn",
"i-tao", "tao",
@ -102,17 +119,175 @@ static const char* const GRANDFATHERED[] = {
"sgn-ch-de", "sgg",
"zh-guoyu", "cmn",
"zh-hakka", "hak",
"zh-min", "nan-x-zh-min",
"zh-min-nan", "nan",
"zh-xiang", "hsn",
NULL, NULL
// Grandfathered tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
"cel-gaulish", "xtg-x-cel-gaulish",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-mingo", "see-x-i-mingo",
"zh-min", "nan-x-zh-min",
};
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
The table lists redundant tags with preferred value in the IANA languate tag registry.
It's generated with the following command:
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
tr 'A-Z' 'a-z'
In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
*/
static const char* const REDUNDANT[] = {
// redundant preferred
"sgn-br", "bzs",
"sgn-co", "csn",
"sgn-de", "gsg",
"sgn-dk", "dsl",
"sgn-es", "ssp",
"sgn-fr", "fsl",
"sgn-gb", "bfi",
"sgn-gr", "gss",
"sgn-ie", "isg",
"sgn-it", "ise",
"sgn-jp", "jsl",
"sgn-mx", "mfs",
"sgn-ni", "ncs",
"sgn-nl", "dse",
"sgn-no", "nsl",
"sgn-pt", "psr",
"sgn-se", "swl",
"sgn-us", "ase",
"sgn-za", "sfs",
"zh-cmn", "cmn",
"zh-cmn-hans", "cmn-hans",
"zh-cmn-hant", "cmn-hant",
"zh-gan", "gan",
"zh-wuu", "wuu",
"zh-yue", "yue",
// variant tag with preferred value
"ja-latn-hepburn-heploc", "ja-latn-alalc97",
};
/*
Updated on 2018-09-12 from
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
grep -B1 'Preferred' | grep -v '^--' | \
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
Make sure that 2-letter language subtags come before 3-letter subtags.
*/
static const char DEPRECATEDLANGS[][4] = {
/* deprecated new */
"in", "id",
"iw", "he",
"ji", "yi",
"in", "id"
"jw", "jv",
"mo", "ro",
"aam", "aas",
"adp", "dz",
"aue", "ktz",
"ayx", "nun",
"bgm", "bcg",
"bjd", "drl",
"ccq", "rki",
"cjr", "mom",
"cka", "cmr",
"cmk", "xch",
"coy", "pij",
"cqu", "quh",
"drh", "khk",
"drw", "prs",
"gav", "dev",
"gfx", "vaj",
"ggn", "gvr",
"gti", "nyc",
"guv", "duz",
"hrr", "jal",
"ibi", "opa",
"ilw", "gal",
"jeg", "oyb",
"kgc", "tdf",
"kgh", "kml",
"koj", "kwv",
"krm", "bmf",
"ktr", "dtp",
"kvs", "gdj",
"kwq", "yam",
"kxe", "tvd",
"kzj", "dtp",
"kzt", "dtp",
"lii", "raq",
"lmm", "rmx",
"meg", "cir",
"mst", "mry",
"mwj", "vaj",
"myt", "mry",
"nad", "xny",
"ncp", "kdz",
"nnx", "ngv",
"nts", "pij",
"oun", "vaj",
"pcr", "adx",
"pmc", "huw",
"pmu", "phr",
"ppa", "bfy",
"ppr", "lcq",
"pry", "prt",
"puz", "pub",
"sca", "hle",
"skk", "oyb",
"tdu", "dtp",
"thc", "tpo",
"thx", "oyb",
"tie", "ras",
"tkk", "twm",
"tlw", "weo",
"tmp", "tyj",
"tne", "kak",
"tnf", "prs",
"tsf", "taj",
"uok", "ema",
"xba", "cax",
"xia", "acn",
"xkh", "waw",
"xsj", "suj",
"ybd", "rki",
"yma", "lrr",
"ymt", "mtm",
"yos", "zom",
"yuu", "yug",
};
/*
Updated on 2018-04-24 from
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
grep -B1 'Preferred' | \
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
*/
static const char DEPRECATEDREGIONS[][3] = {
/* deprecated new */
"BU", "MM",
"DD", "DE",
"FX", "FR",
"TP", "TL",
"YD", "YE",
"ZR", "CD",
};
/*
@ -172,6 +347,46 @@ static const char*
ultag_getGrandfathered(const ULanguageTag* langtag);
#endif
namespace {
// Helper class to memory manage CharString objects.
// Only ever stack-allocated, does not need to inherit UMemory.
class CharStringPool {
public:
CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
~CharStringPool() = default;
CharStringPool(const CharStringPool&) = delete;
CharStringPool& operator=(const CharStringPool&) = delete;
icu::CharString* create() {
if (U_FAILURE(status)) {
return nullptr;
}
icu::CharString* const obj = new icu::CharString;
if (obj == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
pool.addElement(obj, status);
if (U_FAILURE(status)) {
delete obj;
return nullptr;
}
return obj;
}
private:
static void U_CALLCONV deleter(void* obj) {
delete static_cast<icu::CharString*>(obj);
}
UErrorCode status;
icu::UVector pool;
};
} // namespace
/*
* -------------------------------------------------
*
@ -675,6 +890,11 @@ _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac
} else {
/* resolve deprecated */
for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
// 2-letter deprecated subtags are listede before 3-letter
// ones in DEPRECATEDLANGS[]. Get out of loop on coming
// across the 1st 3-letter subtag, if the input is a 2-letter code.
// to avoid continuing to try when there's no match.
if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
len = (int32_t)uprv_strlen(buf);
@ -721,7 +941,6 @@ _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
*(appendAt + reslen) = SEP;
}
reslen++;
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
}
@ -763,6 +982,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
*(appendAt + reslen) = SEP;
}
reslen++;
/* resolve deprecated */
for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
len = (int32_t)uprv_strlen(buf);
break;
}
}
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
@ -900,7 +1127,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
static int32_t
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
UEnumeration *keywordEnum = NULL;
@ -920,22 +1146,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
AttributeListEntry *firstAttr = NULL;
AttributeListEntry *attr;
char *attrValue;
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char *pExtBuf = extBuf;
int32_t extBufCapacity = sizeof(extBuf);
CharStringPool extBufPool;
const char *bcpKey=nullptr, *bcpValue=nullptr;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isBcpUExt;
while (TRUE) {
icu::CharString buf;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
}
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
/* buf must be null-terminated */
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
char* buffer;
int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
for (;;) {
buffer = buf.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
break;
}
len = uloc_getKeywordValue(
localeID, key, buffer, resultCapacity, &tmpStatus);
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}
resultCapacity = len;
tmpStatus = U_ZERO_ERROR;
}
if (U_FAILURE(tmpStatus)) {
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -945,6 +1197,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
buf.append(buffer, len, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}
keylen = (int32_t)uprv_strlen(key);
isBcpUExt = (keylen > 1);
@ -1007,7 +1264,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
/* we've checked buf is null-terminated above */
bcpValue = uloc_toUnicodeLocaleType(key, buf);
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@ -1015,33 +1272,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
continue;
}
if (bcpValue == buf) {
/*
if (bcpValue == buf.data()) {
/*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
the value to lower case
value to lower case
*/
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
bcpValue = pExtBuf;
pExtBuf += (bcpValueLen + 1);
extBufCapacity -= (bcpValueLen + 1);
} else {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
icu::CharString* extBuf = extBufPool.create();
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
int32_t resultCapacity;
char* pExtBuf = extBuf->getAppendBuffer(
/*minCapacity=*/bcpValueLen,
/*desiredCapacityHint=*/bcpValueLen,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
bcpValue = extBuf->data();
}
} else {
if (*key == PRIVATEUSE) {
if (!_isPrivateuseValueSubtags(buf, len)) {
if (!_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -1049,7 +1317,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
} else {
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -1058,20 +1326,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
bcpKey = key;
if ((len + 1) < extBufCapacity) {
uprv_memcpy(pExtBuf, buf, len);
bcpValue = pExtBuf;
pExtBuf += len;
*pExtBuf = 0;
pExtBuf++;
extBufCapacity -= (len + 1);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
icu::CharString* extBuf = extBufPool.create();
if (extBuf == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
extBuf->append(buf.data(), len, tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
break;
}
bcpValue = extBuf->data();
}
/* create ExtensionListEntry */
@ -1242,6 +1507,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
attrBufIdx += (len + 1);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
uprv_free(attr);
goto cleanup;
}
@ -1460,9 +1726,9 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
kwd->value = pType;
if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
// duplicate keyword is allowed, Only the first
// is honored.
uprv_free(kwd);
goto cleanup;
}
}
@ -1836,7 +2102,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
/* check if the tag is grandfathered */
for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
int32_t newTagLength;
@ -1858,6 +2124,37 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
}
size_t parsedLenDelta = 0;
if (grandfatheredLen == 0) {
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
const char* redundantTag = REDUNDANT[i];
size_t redundantTagLen = uprv_strlen(redundantTag);
// The preferred tag for a redundant tag is always shorter than redundant
// tag. A redundant tag may or may not be followed by other subtags.
// (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
const char* redundantTagEnd = tagBuf + redundantTagLen;
if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
const char* preferredTag = REDUNDANT[i + 1];
size_t preferredTagLen = uprv_strlen(preferredTag);
uprv_strncpy(t->buf, preferredTag, preferredTagLen);
if (*redundantTagEnd == SEP) {
uprv_memmove(tagBuf + preferredTagLen,
redundantTagEnd,
tagLen - redundantTagLen + 1);
} else {
tagBuf[preferredTagLen] = '\0';
}
// parsedLen should be the length of the input
// before redundantTag is replaced by preferredTag.
// Save the delta to add it back later.
parsedLenDelta = redundantTagLen - preferredTagLen;
break;
}
}
}
}
/*
* langtag = language
* ["-" script]
@ -1898,10 +2195,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
if (next & LANG) {
if (_isLanguageSubtag(pSubtag, subtagLen)) {
*pSep = 0; /* terminate */
// TODO: move deprecated language code handling here.
t->language = T_CString_toLowerCase(pSubtag);
pLastGoodPosition = pSep;
next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
next = SCRT | REGN | VART | EXTS | PRIV;
if (subtagLen <= 3)
next |= EXTL;
continue;
}
}
@ -1942,6 +2242,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
if (next & REGN) {
if (_isRegionSubtag(pSubtag, subtagLen)) {
*pSep = 0;
// TODO: move deprecated region code handling here.
t->region = T_CString_toUpperCase(pSubtag);
pLastGoodPosition = pSep;
@ -2035,7 +2336,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
}
if (next & PRIV) {
if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
char *pPrivuseVal;
if (pExtension != NULL) {
@ -2138,7 +2439,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
if (parsedLen != NULL) {
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
(int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
}
return t;
@ -2335,31 +2637,66 @@ uloc_toLanguageTag(const char* localeID,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
/* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
char canonical[256];
int32_t reslen = 0;
icu::CharString canonical;
int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = FALSE;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
canonical[0] = 0;
if (uprv_strlen(localeID) > 0) {
uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
if (tmpStatus != U_ZERO_ERROR) {
int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
if (resultCapacity > 0) {
char* buffer;
for (;;) {
buffer = canonical.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return 0;
}
reslen =
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}
resultCapacity = reslen;
tmpStatus = U_ZERO_ERROR;
}
if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
canonical.append(buffer, reslen, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return 0;
}
}
reslen = 0;
/* For handling special case - private use only tag */
pKeywordStart = locale_getKeywordsStart(canonical);
if (pKeywordStart == canonical) {
pKeywordStart = locale_getKeywordsStart(canonical.data());
if (pKeywordStart == canonical.data()) {
UEnumeration *kwdEnum;
int kwdCnt = 0;
UBool done = FALSE;
kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
if (kwdEnum != NULL) {
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
if (kwdCnt == 1) {
@ -2397,12 +2734,12 @@ uloc_toLanguageTag(const char* localeID,
}
}
reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
return reslen;
}
@ -2414,6 +2751,23 @@ uloc_forLanguageTag(const char* langtag,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
return ulocimp_forLanguageTag(
langtag,
-1,
localeID,
localeIDCapacity,
parsedLength,
status);
}
U_CAPI int32_t U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
char* localeID,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
ULanguageTag *lt;
int32_t reslen = 0;
const char *subtag, *p;
@ -2421,7 +2775,7 @@ uloc_forLanguageTag(const char* langtag,
int32_t i, n;
UBool noRegion = TRUE;
lt = ultag_parse(langtag, -1, parsedLength, status);
lt = ultag_parse(langtag, tagLen, parsedLength, status);
if (U_FAILURE(*status)) {
return 0;
}

Просмотреть файл

@ -61,6 +61,38 @@ ulocimp_getCountry(const char *localeID,
char *country, int32_t countryCapacity,
const char **pEnd);
/**
* Returns a locale ID for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
* <p>
* This implements the 'Language-Tag' production of BCP47, and so
* supports grandfathered (regular and irregular) as well as private
* use language tags. Private use tags are represented as 'x-whatever',
* and grandfathered tags are converted to their canonical replacements
* where they exist. Note that a few grandfathered tags have no modern
* replacement, these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
* @param langtag the input BCP47 language tag.
* @param tagLen the length of langtag, or -1 to call uprv_strlen().
* @param localeID the output buffer receiving a locale ID for the
* specified BCP47 language tag.
* @param localeIDCapacity the size of the locale ID output buffer.
* @param parsedLength if not NULL, successfully parsed length
* for the input language tag is set.
* @param err error information if receiving the locald ID
* failed.
* @return the length of the locale ID.
* @internal ICU 63
*/
U_CAPI int32_t U_EXPORT2
ulocimp_forLanguageTag(const char* langtag,
int32_t tagLen,
char* localeID,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* err);
/**
* Get the region to use for supplemental data lookup. Uses
* (1) any region specified by locale tag "rg"; if none then

Просмотреть файл

@ -22,6 +22,7 @@
#include "uposixdefs.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "udatamem.h"
#include "umapfile.h"
@ -64,7 +65,7 @@
# include "unicode/udata.h"
# define LIB_PREFIX "lib"
# define LIB_SUFFIX ".dll"
/* This is inconvienient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
/* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
# endif
#elif MAP_IMPLEMENTATION==MAP_STDIO
@ -84,7 +85,10 @@
*----------------------------------------------------------------------------*/
#if MAP_IMPLEMENTATION==MAP_NONE
U_CFUNC UBool
uprv_mapFile(UDataMemory *pData, const char *path) {
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
if (U_FAILURE(*status)) {
return FALSE;
}
UDataMemory_init(pData); /* Clear the output struct. */
return FALSE; /* no file access */
}
@ -97,12 +101,17 @@
uprv_mapFile(
UDataMemory *pData, /* Fill in with info on the result doing the mapping. */
/* Output only; any original contents are cleared. */
const char *path /* File path to be opened/mapped */
const char *path, /* File path to be opened/mapped. */
UErrorCode *status /* Error status, used to report out-of-memory errors. */
)
{
HANDLE map;
HANDLE file;
if (U_FAILURE(*status)) {
return FALSE;
}
UDataMemory_init(pData); /* Clear the output struct. */
/* open the input file */
@ -111,28 +120,29 @@
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL);
#else
// First we need to go from char to UTF-16
// u_UCharsToChars could work but it requires length.
WCHAR utf16Path[MAX_PATH];
int32_t i;
for (i = 0; i < UPRV_LENGTHOF(utf16Path); i++)
{
utf16Path[i] = path[i];
if (path[i] == '\0')
{
break;
}
// Convert from UTF-8 string to UTF-16 string.
wchar_t utf16Path[MAX_PATH];
int32_t pathUtf16Len = 0;
u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
if (U_FAILURE(*status)) {
return FALSE;
}
if (i >= UPRV_LENGTHOF(utf16Path))
{
// Ran out of room, unlikely but be safe
utf16Path[UPRV_LENGTHOF(utf16Path) - 1] = '\0';
if (*status == U_STRING_NOT_TERMINATED_WARNING) {
// Report back an error instead of a warning.
*status = U_BUFFER_OVERFLOW_ERROR;
return FALSE;
}
// TODO: Is it worth setting extended parameters to specify random access?
file = CreateFile2(utf16Path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL);
#endif
if(file==INVALID_HANDLE_VALUE) {
if (file == INVALID_HANDLE_VALUE) {
// If we failed to open the file due to an out-of-memory error, then we want
// to report that error back to the caller.
if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
}
@ -165,7 +175,12 @@
map = CreateFileMappingFromApp(file, NULL, PAGE_READONLY, 0, NULL);
#endif
CloseHandle(file);
if(map==NULL) {
if (map == NULL) {
// If we failed to create the mapping due to an out-of-memory error, then
// we want to report that error back to the caller.
if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
}
@ -193,12 +208,16 @@
#elif MAP_IMPLEMENTATION==MAP_POSIX
U_CFUNC UBool
uprv_mapFile(UDataMemory *pData, const char *path) {
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
int fd;
int length;
struct stat mystat;
void *data;
if (U_FAILURE(*status)) {
return FALSE;
}
UDataMemory_init(pData); /* Clear the output struct. */
/* determine the length of the file */
@ -221,6 +240,7 @@
#endif
close(fd); /* no longer needed */
if(data==MAP_FAILED) {
// Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
return FALSE;
}
@ -263,11 +283,15 @@
}
U_CFUNC UBool
uprv_mapFile(UDataMemory *pData, const char *path) {
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
FILE *file;
int32_t fileLength;
void *p;
if (U_FAILURE(*status)) {
return FALSE;
}
UDataMemory_init(pData); /* Clear the output struct. */
/* open the input file */
file=fopen(path, "rb");
@ -286,6 +310,7 @@
p=uprv_malloc(fileLength);
if(p==NULL) {
fclose(file);
*status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
@ -351,7 +376,7 @@
*
* TODO: This works the way ICU historically has, but the
* whole data fallback search path is so complicated that
* proabably almost no one will ever really understand it,
* probably almost no one will ever really understand it,
* the potential for confusion is large. (It's not just
* this one function, but the whole scheme.)
*
@ -391,7 +416,7 @@
# define DATA_TYPE "dat"
U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path) {
U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
const char *inBasename;
char *basename;
char pathBuffer[1024];
@ -399,6 +424,10 @@
dllhandle *handle;
void *val=0;
if (U_FAILURE(*status)) {
return FALSE;
}
inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
if(inBasename==NULL) {
inBasename = path;
@ -430,6 +459,7 @@
data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd); /* no longer needed */
if(data==MAP_FAILED) {
// Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
return FALSE;
}
pData->map = (char *)data + length;

Просмотреть файл

@ -29,7 +29,7 @@
#include "unicode/udata.h"
#include "putilimp.h"
U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path);
U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status);
U_CFUNC void uprv_unmapFile(UDataMemory *pData);
/* MAP_NONE: no memory mapping, no file access at all */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -54,6 +54,21 @@ U_NAMESPACE_END
#include <atomic>
// Export an explicit template instantiation of std::atomic<int32_t>.
// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
#if defined(__clang__)
// Suppress the warning that the explicit instantiation after explicit specialization has no effect.
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winstantiation-after-specialization"
#endif
template struct U_COMMON_API std::atomic<int32_t>;
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#endif
U_NAMESPACE_BEGIN
typedef std::atomic<int32_t> u_atomic_int32_t;
@ -205,7 +220,7 @@ umtx_atomic_dec(u_atomic_int32_t *p);
U_NAMESPACE_END
#endif /* Low Level Atomic Ops Platfrom Chain */
#endif /* Low Level Atomic Ops Platform Chain */
@ -319,7 +334,7 @@ U_NAMESPACE_END
*************************************************************************************************/
#if defined(U_USER_MUTEX_H)
// #inlcude "U_USER_MUTEX_H"
// #include "U_USER_MUTEX_H"
#include U_MUTEX_XSTR(U_USER_MUTEX_H)
#elif U_PLATFORM_USES_ONLY_WIN32_API
@ -389,7 +404,7 @@ struct UConditionVar {
#else
/*
* Unknow platform type.
* Unknown platform type.
* This is an error condition. ICU requires mutexes.
*/
@ -401,7 +416,7 @@ struct UConditionVar {
/**************************************************************************************
*
* Mutex Implementation function declaratations.
* Mutex Implementation function declarations.
* Declarations are platform neutral.
* Implementations, in umutex.cpp, are platform specific.
*

Просмотреть файл

@ -466,7 +466,7 @@ static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
}
buffer += ndigits;
length += ndigits;
length += static_cast<uint16_t>(ndigits);
WRITE_CHAR(buffer, bufferLength, length, '>');
return length;

Просмотреть файл

@ -237,13 +237,12 @@ class StringByteSink : public ByteSink {
* @stable ICU 4.2
*/
StringByteSink(StringClass* dest) : dest_(dest) { }
#ifndef U_HIDE_DRAFT_API
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @draft ICU 60
* @stable ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
@ -251,7 +250,6 @@ class StringByteSink : public ByteSink {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
#endif // U_HIDE_DRAFT_API
/**
* Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes

Просмотреть файл

@ -194,7 +194,6 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
#ifndef U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
@ -214,7 +213,7 @@ public:
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToLower
* @draft ICU 60
* @stable ICU 60
*/
static void utf8ToLower(
const char *locale, uint32_t options,
@ -240,7 +239,7 @@ public:
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToUpper
* @draft ICU 60
* @stable ICU 60
*/
static void utf8ToUpper(
const char *locale, uint32_t options,
@ -280,7 +279,7 @@ public:
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToTitle
* @draft ICU 60
* @stable ICU 60
*/
static void utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
@ -311,13 +310,12 @@ public:
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8FoldCase
* @draft ICU 60
* @stable ICU 60
*/
static void utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
/**
* Lowercases a UTF-8 string and optionally records edits.

Просмотреть файл

@ -28,6 +28,8 @@ U_NAMESPACE_BEGIN
// Use the predefined value.
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
#elif defined(U_IN_DOXYGEN)
# define U_ALIASING_BARRIER(ptr)
#endif
/**
@ -103,6 +105,7 @@ private:
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
@ -134,6 +137,7 @@ Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
@ -209,6 +213,7 @@ private:
#endif
};
/// \cond
#ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
@ -240,6 +245,7 @@ ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif
/// \endcond
/**
* Converts from const char16_t * to const UChar *.

Просмотреть файл

@ -88,6 +88,11 @@
* <td>icu::UnicodeSet</td>
* </tr>
* <tr>
* <td>Maps from Unicode Code Points to Integer Values</td>
* <td>ucptrie.h, umutablecptrie.h</td>
* <td>C API</td>
* </tr>
* <tr>
* <td>Maps from Strings to Integer Values</td>
* <td>(no C API)</td>
* <td>icu::BytesTrie, icu::UCharsTrie</td>
@ -208,9 +213,9 @@
* <td>C API</td>
* </tr>
* <tr>
* <td>Layout Engine/Complex Text Layout</td>
* <td>loengine.h</td>
* <td>icu::LayoutEngine,icu::ParagraphLayout</td>
* <td>Paragraph Layout / Complex Text Layout</td>
* <td>playout.h</td>
* <td>icu::ParagraphLayout</td>
* </tr>
* <tr>
* <td>ICU I/O</td>

Просмотреть файл

@ -24,8 +24,8 @@ class UnicodeString;
* in linear progression. Does not support moving/reordering of text.
*
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
* instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
* {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
* mapping between code points in the source and destination strings.
*
@ -62,11 +62,11 @@ class UnicodeString;
* </ul>
*
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
* {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
* their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
* `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
* their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
* methods are used to walk through the string.
*
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
* For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
* UCharacterCaseTest.java.
*
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
@ -86,7 +86,7 @@ public:
/**
* Copy constructor.
* @param other source edits
* @draft ICU 60
* @stable ICU 60
*/
Edits(const Edits &other) :
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
@ -98,7 +98,7 @@ public:
* Move constructor, might leave src empty.
* This object will have the same contents that the source object had.
* @param src source edits
* @draft ICU 60
* @stable ICU 60
*/
Edits(Edits &&src) U_NOEXCEPT :
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
@ -117,7 +117,7 @@ public:
* Assignment operator.
* @param other source edits
* @return *this
* @draft ICU 60
* @stable ICU 60
*/
Edits &operator=(const Edits &other);
@ -127,7 +127,7 @@ public:
* The behavior is undefined if *this and src are the same object.
* @param src source edits
* @return *this
* @draft ICU 60
* @stable ICU 60
*/
Edits &operator=(Edits &&src) U_NOEXCEPT;
@ -173,13 +173,11 @@ public:
*/
UBool hasChanges() const { return numChanges != 0; }
#ifndef U_HIDE_DRAFT_API
/**
* @return the number of change edits
* @draft ICU 60
* @stable ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
#endif // U_HIDE_DRAFT_API
/**
* Access to the list of edits.
@ -189,9 +187,9 @@ public:
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
*
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
* to make it point to the corresponding edit.
* The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
* and `findDestinationIndex(int32_t, UErrorCode &)` methods.
* Calling any of these methods mutates the iterator to make it point to the corresponding edit.
*
* For more information, see the documentation for {@link Edits}.
*
@ -202,7 +200,7 @@ public:
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
* Default constructor, empty iterator.
* @draft ICU 60
* @stable ICU 60
*/
Iterator() :
array(nullptr), index(0), length(0),
@ -253,7 +251,6 @@ public:
return findIndex(i, TRUE, errorCode) == 0;
}
#ifndef U_HIDE_DRAFT_API
/**
* Moves the iterator to the edit that contains the destination index.
* The destination index may be found in a no-change edit
@ -271,7 +268,7 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the destination index was found
* @draft ICU 60
* @stable ICU 60
*/
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, FALSE, errorCode) == 0;
@ -297,7 +294,7 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return destination index; undefined if i is not 0..string length
* @draft ICU 60
* @stable ICU 60
*/
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
@ -321,10 +318,9 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return source index; undefined if i is not 0..string length
* @draft ICU 60
* @stable ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
/**
* Returns whether the edit currently represented by the iterator is a change edit.
@ -366,13 +362,13 @@ public:
/**
* The start index of the current span in the replacement string; the span has length
* {@link #newLength}. Well-defined only if the current edit is a change edit.
* <p>
* The <em>replacement string</em> is the concatenation of all substrings of the destination
*
* The *replacement string* is the concatenation of all substrings of the destination
* string corresponding to change edits.
* <p>
*
* This method is intended to be used together with operations that write only replacement
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
* in-place.
* characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
* The source string can then be modified in-place.
*
* @return the current index into the replacement-characters-only string,
* not counting unchanged spans
@ -475,7 +471,6 @@ public:
return Iterator(array, length, FALSE, FALSE);
}
#ifndef U_HIDE_DRAFT_API
/**
* Merges the two input Edits and appends the result to this object.
*
@ -501,10 +496,9 @@ public:
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return *this, with the merged edits appended
* @draft ICU 60
* @stable ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
#endif // U_HIDE_DRAFT_API
private:
void releaseArray() U_NOEXCEPT;

Просмотреть файл

@ -28,6 +28,7 @@ U_NAMESPACE_BEGIN
* enum bitset for boolean fields. Similar to Java EnumSet<>.
* Needs to range check. Used for private instance variables.
* @internal
* \cond
*/
template<typename T, uint32_t minValue, uint32_t limitValue>
class EnumSet {
@ -60,6 +61,8 @@ private:
uint32_t fBools;
};
/** \endcond */
U_NAMESPACE_END
#endif /* U_SHOW_CPLUSPLUS_API */

Просмотреть файл

@ -67,16 +67,14 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Construct an empty FilteredBreakIteratorBuilder.
* In this state, it will not suppress any segment boundaries.
* @param status The error code.
* @return the new builder
* @draft ICU 60
* @stable ICU 60
*/
static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
#endif /* U_HIDE_DRAFT_API */
/**
* Suppress a certain string from being the end of a segment.
@ -95,7 +93,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* This function does not create any new segment boundaries, but only serves to un-do
* the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
* locale data which may be suppressing certain strings.
* @param exception the exception to remove
* @param string the exception to remove
* @param status error code
* @return returns TRUE if the string was present and now removed,
* FALSE if the call was a no-op because the string was not being suppressed.
@ -114,7 +112,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
#ifndef U_HIDE_DRAFT_API
/**
* Wrap (adopt) an existing break iterator in a new filtered instance.
* The resulting BreakIterator is owned by the caller.
@ -126,12 +123,11 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
* @draft ICU 60
* @stable ICU 60
*/
inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
return build(adoptBreakIterator, status);
}
#endif /* U_HIDE_DRAFT_API */
protected:
/**

Просмотреть файл

@ -110,7 +110,7 @@
#include "unicode/utypes.h"
#if UCONFIG_ENABLE_PLUGINS
#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)

Просмотреть файл

@ -31,6 +31,10 @@
#ifndef LOCID_H
#define LOCID_H
#include "unicode/bytestream.h"
#include "unicode/localpointer.h"
#include "unicode/strenum.h"
#include "unicode/stringpiece.h"
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/putil.h"
@ -280,6 +284,16 @@ public:
*/
Locale(const Locale& other);
#ifndef U_HIDE_DRAFT_API
/**
* Move constructor; might leave source in bogus state.
* This locale will have the same contents that the source locale had.
*
* @param other The Locale object being moved in.
* @draft ICU 63
*/
Locale(Locale&& other) U_NOEXCEPT;
#endif // U_HIDE_DRAFT_API
/**
* Destructor
@ -296,6 +310,19 @@ public:
*/
Locale& operator=(const Locale& other);
#ifndef U_HIDE_DRAFT_API
/**
* Move assignment operator; might leave source in bogus state.
* This locale will have the same contents that the source locale had.
* The behavior is undefined if *this and the source are the same object.
*
* @param other The Locale object being moved in.
* @return *this
* @draft ICU 63
*/
Locale& operator=(Locale&& other) U_NOEXCEPT;
#endif // U_HIDE_DRAFT_API
/**
* Checks if two locale keys are the same.
*
@ -362,6 +389,55 @@ public:
UErrorCode& success);
#endif /* U_HIDE_SYSTEM_API */
#ifndef U_HIDE_DRAFT_API
/**
* Returns a Locale for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
* <p>
* This implements the 'Language-Tag' production of BCP47, and so
* supports grandfathered (regular and irregular) as well as private
* use language tags. Private use tags are represented as 'x-whatever',
* and grandfathered tags are converted to their canonical replacements
* where they exist. Note that a few grandfathered tags have no modern
* replacement, these will be converted using the fallback described in
* the first paragraph, so some information might be lost.
* @param tag the input BCP47 language tag.
* @param status error information if creating the Locale failed.
* @return the Locale for the specified BCP47 language tag.
* @draft ICU 63
*/
static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
/**
* Returns a well-formed language tag for this Locale.
* <p>
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
* requirement will be silently omitted from the result.
*
* If this function fails, partial output may have been written to the sink.
*
* @param sink the output sink receiving the BCP47 language
* tag for this Locale.
* @param status error information if creating the language tag failed.
* @draft ICU 63
*/
void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
/**
* Returns a well-formed language tag for this Locale.
* <p>
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
* requirement will be silently omitted from the result.
*
* @param status error information if creating the language tag failed.
* @return the BCP47 language tag for this Locale.
* @draft ICU 63
*/
template<typename StringClass>
inline StringClass toLanguageTag(UErrorCode& status) const;
#endif // U_HIDE_DRAFT_API
/**
* Creates a locale which has had minimal canonicalization
* as per uloc_getName().
@ -432,6 +508,69 @@ public:
*/
const char * getBaseName() const;
#ifndef U_HIDE_DRAFT_API
/**
* Add the likely subtags for this Locale, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If this Locale is already in the maximal form, or not valid, or there is
* no data available for maximization, the Locale will be unchanged.
*
* For example, "und-Zzzz" cannot be maximized, since there is no
* reasonable maximization.
*
* Examples:
*
* "en" maximizes to "en_Latn_US"
*
* "de" maximizes to "de_Latn_US"
*
* "sr" maximizes to "sr_Cyrl_RS"
*
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
*
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
*
* @param status error information if maximizing this Locale failed.
* If this Locale is not well-formed, the error code is
* U_ILLEGAL_ARGUMENT_ERROR.
* @draft ICU 63
*/
void addLikelySubtags(UErrorCode& status);
/**
* Minimize the subtags for this Locale, per the algorithm described
* in the following CLDR technical report:
*
* http://www.unicode.org/reports/tr35/#Likely_Subtags
*
* If this Locale is already in the minimal form, or not valid, or there is
* no data available for minimization, the Locale will be unchanged.
*
* Since the minimization algorithm relies on proper maximization, see the
* comments for addLikelySubtags for reasons why there might not be any
* data.
*
* Examples:
*
* "en_Latn_US" minimizes to "en"
*
* "de_Latn_US" minimizes to "de"
*
* "sr_Cyrl_RS" minimizes to "sr"
*
* "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
* script, and minimizing to "zh" would imply "zh_Hans_CN".)
*
* @param status error information if maximizing this Locale failed.
* If this Locale is not well-formed, the error code is
* U_ILLEGAL_ARGUMENT_ERROR.
* @draft ICU 63
*/
void minimizeSubtags(UErrorCode& status);
#endif // U_HIDE_DRAFT_API
/**
* Gets the list of keywords for the specified locale.
@ -439,13 +578,62 @@ public:
* @param status the status code
* @return pointer to StringEnumeration class, or NULL if there are no keywords.
* Client must dispose of it by calling delete.
* @see getKeywords
* @stable ICU 2.8
*/
StringEnumeration * createKeywords(UErrorCode &status) const;
#ifndef U_HIDE_DRAFT_API
/**
* Gets the list of Unicode keywords for the specified locale.
*
* @param status the status code
* @return pointer to StringEnumeration class, or NULL if there are no keywords.
* Client must dispose of it by calling delete.
* @see getUnicodeKeywords
* @draft ICU 63
*/
StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
/**
* Gets the set of keywords for this Locale.
*
* A wrapper to call createKeywords() and write the resulting
* keywords as standard strings (or compatible objects) into any kind of
* container that can be written to by an STL style output iterator.
*
* @param iterator an STL style output iterator to write the keywords to.
* @param status error information if creating set of keywords failed.
* @draft ICU 63
*/
template<typename StringClass, typename OutputIterator>
inline void getKeywords(OutputIterator iterator, UErrorCode& status) const;
/**
* Gets the set of Unicode keywords for this Locale.
*
* A wrapper to call createUnicodeKeywords() and write the resulting
* keywords as standard strings (or compatible objects) into any kind of
* container that can be written to by an STL style output iterator.
*
* @param iterator an STL style output iterator to write the keywords to.
* @param status error information if creating set of keywords failed.
* @draft ICU 63
*/
template<typename StringClass, typename OutputIterator>
inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const;
#endif // U_HIDE_DRAFT_API
/**
* Gets the value for a keyword.
*
* This uses legacy keyword=value pairs, like "collation=phonebook".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword for which we want the value. Case insensitive.
* @param buffer The buffer to receive the keyword value.
* @param bufferCapacity The capacity of receiving buffer
@ -456,12 +644,81 @@ public:
*/
int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
#ifndef U_HIDE_DRAFT_API
/**
* Gets the value for a keyword.
*
* This uses legacy keyword=value pairs, like "collation=phonebook".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword for which we want the value.
* @param sink the sink to receive the keyword value.
* @param status error information if getting the value failed.
* @draft ICU 63
*/
void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
/**
* Gets the value for a keyword.
*
* This uses legacy keyword=value pairs, like "collation=phonebook".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword for which we want the value.
* @param status error information if getting the value failed.
* @return the keyword value.
* @draft ICU 63
*/
template<typename StringClass>
inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const;
/**
* Gets the Unicode value for a Unicode keyword.
*
* This uses Unicode key-value pairs, like "co-phonebk".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword for which we want the value.
* @param sink the sink to receive the keyword value.
* @param status error information if getting the value failed.
* @draft ICU 63
*/
void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
/**
* Gets the Unicode value for a Unicode keyword.
*
* This uses Unicode key-value pairs, like "co-phonebk".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword for which we want the value.
* @param status error information if getting the value failed.
* @return the keyword value.
* @draft ICU 63
*/
template<typename StringClass>
inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const;
#endif // U_HIDE_DRAFT_API
/**
* Sets or removes the value for a keyword.
*
* For removing all keywords, use getBaseName(),
* and construct a new Locale if it differs from getName().
*
* This uses legacy keyword=value pairs, like "collation=phonebook".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword to be set. Case insensitive.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
@ -472,6 +729,48 @@ public:
*/
void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
#ifndef U_HIDE_DRAFT_API
/**
* Sets or removes the value for a keyword.
*
* For removing all keywords, use getBaseName(),
* and construct a new Locale if it differs from getName().
*
* This uses legacy keyword=value pairs, like "collation=phonebook".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword to be set.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
* that keyword does not exist.
* @param status Returns any error information while performing this operation.
* @draft ICU 63
*/
void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
/**
* Sets or removes the Unicode value for a Unicode keyword.
*
* For removing all keywords, use getBaseName(),
* and construct a new Locale if it differs from getName().
*
* This uses Unicode key-value pairs, like "co-phonebk".
*
* ICU4C doesn't do automatic conversion between legacy and Unicode
* keywords and values in getters and setters (as opposed to ICU4J).
*
* @param keywordName name of the keyword to be set.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
* that keyword does not exist.
* @param status Returns any error information while performing this operation.
* @draft ICU 63
*/
void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
#endif // U_HIDE_DRAFT_API
/**
* returns the locale's three-letter language code, as specified
* in ISO draft standard ISO-639-2.
@ -759,12 +1058,12 @@ private:
/**
* A friend to allow the default locale to be set by either the C or C++ API.
* @internal
* @internal (private)
*/
friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
/**
* @internal
* @internal (private)
*/
friend void U_CALLCONV locale_available_init();
};
@ -775,6 +1074,17 @@ Locale::operator!=(const Locale& other) const
return !operator==(other);
}
#ifndef U_HIDE_DRAFT_API
template<typename StringClass> inline StringClass
Locale::toLanguageTag(UErrorCode& status) const
{
StringClass result;
StringByteSink<StringClass> sink(&result);
toLanguageTag(sink, status);
return result;
}
#endif // U_HIDE_DRAFT_API
inline const char *
Locale::getCountry() const
{
@ -805,6 +1115,62 @@ Locale::getName() const
return fullName;
}
#ifndef U_HIDE_DRAFT_API
template<typename StringClass, typename OutputIterator> inline void
Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
{
LocalPointer<StringEnumeration> keys(createKeywords(status));
if (U_FAILURE(status)) {
return;
}
for (;;) {
int32_t resultLength;
const char* buffer = keys->next(&resultLength, status);
if (U_FAILURE(status) || buffer == nullptr) {
return;
}
*iterator++ = StringClass(buffer, resultLength);
}
}
template<typename StringClass, typename OutputIterator> inline void
Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
{
LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status));
if (U_FAILURE(status)) {
return;
}
for (;;) {
int32_t resultLength;
const char* buffer = keys->next(&resultLength, status);
if (U_FAILURE(status) || buffer == nullptr) {
return;
}
*iterator++ = StringClass(buffer, resultLength);
}
}
template<typename StringClass> inline StringClass
Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
{
StringClass result;
StringByteSink<StringClass> sink(&result);
getKeywordValue(keywordName, sink, status);
return result;
}
template<typename StringClass> inline StringClass
Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const
{
StringClass result;
StringByteSink<StringClass> sink(&result);
getUnicodeKeywordValue(keywordName, sink, status);
return result;
}
#endif // U_HIDE_DRAFT_API
inline UBool
Locale::isBogus(void) const {
return fIsBogus;

Просмотреть файл

@ -771,8 +771,8 @@ public:
* @stable ICU 4.8
*/
UMessagePatternArgType getArgType() const {
UMessagePatternPartType type=getType();
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
UMessagePatternPartType msgType=getType();
if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
return (UMessagePatternArgType)value;
} else {
return UMSGPAT_ARG_TYPE_NONE;

Просмотреть файл

@ -241,7 +241,7 @@ public:
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
* @stable ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
@ -391,7 +391,7 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
* @stable ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
@ -559,7 +559,7 @@ public:
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
* @stable ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
@ -686,7 +686,7 @@ public:
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
* @stable ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;

Просмотреть файл

@ -38,7 +38,7 @@
* and/or from other macros that are predefined by the compiler
* or defined in standard (POSIX or platform or compiler) headers.
*
* As a temporary workaround, you can add an explicit <code>#define</code> for some macros
* As a temporary workaround, you can add an explicit \#define for some macros
* before it is first tested, or add an equivalent -D macro definition
* to the compiler's command line.
*
@ -207,6 +207,9 @@
# define CYGWINMSVC
#endif
*/
#ifdef U_IN_DOXYGEN
# define CYGWINMSVC
#endif
/**
* \def U_PLATFORM_USES_ONLY_WIN32_API
@ -417,6 +420,9 @@
#ifndef __has_cpp_attribute
# define __has_cpp_attribute(x) 0
#endif
#ifndef __has_declspec_attribute
# define __has_declspec_attribute(x) 0
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0
#endif
@ -493,13 +499,8 @@ namespace std {
*/
#ifdef U_NOEXCEPT
/* Use the predefined value. */
#elif defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS /* Visual Studio */
# define U_NOEXCEPT
#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_noexcept) || __has_extension(cxx_noexcept) \
|| (defined(_MSC_VER) && _MSC_VER >= 1900) /* Visual Studio 2015 */
# define U_NOEXCEPT noexcept
#else
# define U_NOEXCEPT
# define U_NOEXCEPT noexcept
#endif
/**
@ -519,6 +520,8 @@ namespace std {
(__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough"))
# define U_FALLTHROUGH [[clang::fallthrough]]
# endif
#elif defined(__GNUC__) && (__GNUC__ >= 7)
# define U_FALLTHROUGH __attribute__((fallthrough))
#endif
#ifndef U_FALLTHROUGH
@ -763,7 +766,8 @@ namespace std {
#elif U_HAVE_CHAR16_T \
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|| (defined(__HP_cc) && __HP_cc >= 111106)
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|| (defined(U_IN_DOXYGEN))
# define U_DECLARE_UTF16(string) u ## string
#elif U_SIZEOF_WCHAR_T == 2 \
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
@ -782,6 +786,8 @@ namespace std {
/* Use the predefined value. */
#elif defined(U_STATIC_IMPLEMENTATION)
# define U_EXPORT
#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport))
# define U_EXPORT __declspec(dllexport)
#elif defined(__GNUC__)
# define U_EXPORT __attribute__((visibility("default")))
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
@ -789,8 +795,6 @@ namespace std {
# define U_EXPORT __global
/*#elif defined(__HP_aCC) || defined(__HP_cc)
# define U_EXPORT __declspec(dllexport)*/
#elif defined(_MSC_VER)
# define U_EXPORT __declspec(dllexport)
#else
# define U_EXPORT
#endif
@ -806,7 +810,7 @@ namespace std {
#ifdef U_IMPORT
/* Use the predefined value. */
#elif defined(_MSC_VER)
#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport))
/* Windows needs to export/import data. */
# define U_IMPORT __declspec(dllimport)
#else

Просмотреть файл

@ -83,6 +83,7 @@ typedef unsigned char uint8_t;
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
/// \cond
#if ! U_HAVE_INT8_T
typedef signed char int8_t;
#endif
@ -122,6 +123,7 @@ typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
#endif
#endif
/// \endcond
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */

Просмотреть файл

@ -99,7 +99,7 @@ private:
* If present, UStack of LanguageBreakEngine objects that might handle
* dictionary characters. Searched from top to bottom to find an object to
* handle a given character.
* @internal
* @internal (private)
*/
UStack *fLanguageBreakEngines;
@ -108,14 +108,14 @@ private:
* If present, the special LanguageBreakEngine used for handling
* characters that are in the dictionary set, but not handled by any
* LangugageBreakEngine.
* @internal
* @internal (private)
*/
UnhandledEngine *fUnhandledBreakEngine;
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
* @internal
* @internal (private)
*/
uint32_t fDictionaryCharCount;
@ -150,7 +150,7 @@ private:
*
* The break iterator adopts the memory, and will
* free it when done.
* @internal
* @internal (private)
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);

Просмотреть файл

@ -39,8 +39,6 @@
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
#ifndef U_HIDE_DRAFT_API
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
@ -50,7 +48,7 @@
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
* @stable ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
@ -63,12 +61,10 @@
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
* @stable ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
#endif // U_HIDE_DRAFT_API
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
@ -112,8 +108,6 @@
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
#ifndef U_HIDE_DRAFT_API
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
@ -130,7 +124,7 @@
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @draft ICU 60
* @stable ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
@ -141,7 +135,7 @@
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
* @stable ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
@ -153,12 +147,10 @@
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
* @stable ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
#endif // U_HIDE_DRAFT_API
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.

Просмотреть файл

@ -26,8 +26,10 @@
*/
// Forward declaration.
/// \cond
struct UHashtable;
typedef struct UHashtable UHashtable;
/// \endcond
/**
* Build options for BytesTrieBuilder and CharsTrieBuilder.
@ -64,7 +66,7 @@ class U_COMMON_API StringTrieBuilder : public UObject {
public:
#ifndef U_HIDE_INTERNAL_API
/** @internal */
static UBool hashNode(const void *node);
static int32_t hashNode(const void *node);
/** @internal */
static UBool equalNodes(const void *left, const void *right);
#endif /* U_HIDE_INTERNAL_API */
@ -188,7 +190,10 @@ protected:
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
// it is needed for layout of other objects.
/** @internal */
/**
* @internal
* \cond
*/
class Node : public UObject {
public:
Node(int32_t initialHash) : hash(initialHash), offset(0) {}
@ -391,7 +396,9 @@ protected:
int32_t length;
Node *next; // A branch sub-node.
};
#endif /* U_HIDE_INTERNAL_API */
/// \endcond
/** @internal */
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,

Просмотреть файл

@ -323,6 +323,10 @@
* these special values are designed that way. Also, the implementation
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
*
* Note: The numeric values of the related constants will not change:
* They are tied to the use of 7-bit byte values (plus the override bit)
* and of the UBiDiLevel=uint8_t data type in this API.
*
* @see UBIDI_DEFAULT_LTR
* @see UBIDI_DEFAULT_RTL
* @see UBIDI_LEVEL_OVERRIDE
@ -386,6 +390,8 @@ typedef uint8_t UBiDiLevel;
/**
* Maximum explicit embedding level.
* Same as the max_depth value in the
* <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
* @stable ICU 2.0
*/
@ -1996,7 +2002,7 @@ U_CDECL_BEGIN
*
* @return The directional property / Bidi class for the given code point
* <code>c</code> if the default class has been overridden, or
* <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
* <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
* if the standard Bidi class value for <code>c</code> is to be used.
* @see ubidi_setClassCallback
* @see ubidi_getClassCallback
@ -2010,7 +2016,7 @@ U_CDECL_END
/**
* Retrieve the Bidi class for a given code point.
* <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
* value other than <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
* value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
* that value is used; otherwise the default class determination mechanism is invoked.</p>
*
* @param pBiDi is the paragraph <code>UBiDi</code> object.

Просмотреть файл

@ -26,33 +26,38 @@
/**
* \file
* \brief Bidi Transformations
*/
/**
* `UBiDiOrder` indicates the order of text.
*
* <code>UBiDiOrder</code> indicates the order of text.<p>
* This bidi transformation engine supports all possible combinations (4 in
* total) of input and output text order:
* <ul>
* <li><logical input, visual output>: unless the output direction is RTL, this
* corresponds to a normal operation of the Bidi algorithm as described in the
* Unicode Technical Report and implemented by <code>UBiDi</code> when the
* reordering mode is set to <code>UBIDI_REORDER_DEFAULT</code>. Visual RTL
* mode is not supported by <code>UBiDi</code> and is accomplished through
* reversing a visual LTR string,</li>
* <li><visual input, logical output>: unless the input direction is RTL, this
* corresponds to an "inverse bidi algorithm" in <code>UBiDi</code> with the
* reordering mode set to <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>.
* Visual RTL mode is not not supported by <code>UBiDi</code> and is
* accomplished through reversing a visual LTR string,</li>
* <li><logical input, logical output>: if the input and output base directions
* mismatch, this corresponds to the <code>UBiDi</code> implementation with the
* reordering mode set to <code>UBIDI_REORDER_RUNS_ONLY</code>; and if the
* input and output base directions are identical, the transformation engine
* will only handle character mirroring and Arabic shaping operations without
* reordering,</li>
* <li><visual input, visual output>: this reordering mode is not supported by
* the <code>UBiDi</code> engine; it implies character mirroring, Arabic
* shaping, and - if the input/output base directions mismatch - string
* reverse operations.</li>
* </ul>
*
* - <logical input, visual output>: unless the output direction is RTL, this
* corresponds to a normal operation of the Bidi algorithm as described in the
* Unicode Technical Report and implemented by `UBiDi` when the
* reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
* mode is not supported by `UBiDi` and is accomplished through
* reversing a visual LTR string,
*
* - <visual input, logical output>: unless the input direction is RTL, this
* corresponds to an "inverse bidi algorithm" in `UBiDi` with the
* reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
* Visual RTL mode is not not supported by `UBiDi` and is
* accomplished through reversing a visual LTR string,
*
* - <logical input, logical output>: if the input and output base directions
* mismatch, this corresponds to the `UBiDi` implementation with the
* reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
* input and output base directions are identical, the transformation engine
* will only handle character mirroring and Arabic shaping operations without
* reordering,
*
* - <visual input, visual output>: this reordering mode is not supported by
* the `UBiDi` engine; it implies character mirroring, Arabic
* shaping, and - if the input/output base directions mismatch - string
* reverse operations.
* @see ubidi_setInverse
* @see ubidi_setReorderingMode
* @see UBIDI_REORDER_DEFAULT

Просмотреть файл

@ -27,6 +27,24 @@
#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
#include "unicode/ucpmap.h"
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
#define USET_DEFINED
/**
* USet is the C API type corresponding to C++ class UnicodeSet.
* It is forward-declared here to avoid including unicode/uset.h file if related
* APIs are not used.
*
* @see ucnv_getUnicodeSet
* @stable ICU 2.4
*/
typedef struct USet USet;
#endif
U_CDECL_BEGIN
@ -61,6 +79,18 @@ U_CDECL_BEGIN
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
*
* Many properties are accessible via generic functions that take a UProperty selector.
* - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point.
* - u_getIntPropertyValue() returns an integer value per property and code point.
* For each supported enumerated or catalog property, there is
* an enum type for all of the property's values, and
* u_getIntPropertyValue() returns the numeric values of those constants.
* - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
* all code points for which the property is true.
* - u_getIntPropertyMap() returns a map for each
* ICU-supported enumerated/catalog/int-valued property which
* maps all Unicode code points to their values for that property.
*
* Many functions are designed to match java.lang.Character functions.
* See the individual function documentation,
* and see the JDK 1.4 java.lang.Character documentation
@ -546,12 +576,34 @@ typedef enum UProperty {
(http://www.unicode.org/reports/tr9/)
Returns UBidiPairedBracketType values. @stable ICU 52 */
UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/**
* Enumerated property Indic_Positional_Category.
* New in Unicode 6.0 as provisional property Indic_Matra_Category;
* renamed and changed to informative in Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
/**
* Enumerated property Indic_Syllabic_Category.
* New in Unicode 6.0 as provisional; informative since Unicode 8.0.
* See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
* @stable ICU 63
*/
UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
/**
* Enumerated property Vertical_Orientation.
* Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
* New as a UCD property in Unicode 10.0.
* @stable ICU 63
*/
UCHAR_VERTICAL_ORIENTATION=0x1018,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for enumerated/integer Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_INT_LIMIT=0x1016,
UCHAR_INT_LIMIT=0x1019,
#endif // U_HIDE_DEPRECATED_API
/** Bitmask property General_Category_Mask.
@ -2320,6 +2372,161 @@ typedef enum UHangulSyllableType {
#endif // U_HIDE_DEPRECATED_API
} UHangulSyllableType;
/**
* Indic Positional Category constants.
*
* @see UCHAR_INDIC_POSITIONAL_CATEGORY
* @stable ICU 63
*/
typedef enum UIndicPositionalCategory {
/*
* Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
* It matches lines like
* U_INPC_<Unicode Indic_Positional_Category value name>
*/
/** @stable ICU 63 */
U_INPC_NA,
/** @stable ICU 63 */
U_INPC_BOTTOM,
/** @stable ICU 63 */
U_INPC_BOTTOM_AND_LEFT,
/** @stable ICU 63 */
U_INPC_BOTTOM_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_LEFT,
/** @stable ICU 63 */
U_INPC_LEFT_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_OVERSTRUCK,
/** @stable ICU 63 */
U_INPC_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP,
/** @stable ICU 63 */
U_INPC_TOP_AND_BOTTOM,
/** @stable ICU 63 */
U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP_AND_LEFT,
/** @stable ICU 63 */
U_INPC_TOP_AND_LEFT_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_TOP_AND_RIGHT,
/** @stable ICU 63 */
U_INPC_VISUAL_ORDER_LEFT,
} UIndicPositionalCategory;
/**
* Indic Syllabic Category constants.
*
* @see UCHAR_INDIC_SYLLABIC_CATEGORY
* @stable ICU 63
*/
typedef enum UIndicSyllabicCategory {
/*
* Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
* It matches lines like
* U_INSC_<Unicode Indic_Syllabic_Category value name>
*/
/** @stable ICU 63 */
U_INSC_OTHER,
/** @stable ICU 63 */
U_INSC_AVAGRAHA,
/** @stable ICU 63 */
U_INSC_BINDU,
/** @stable ICU 63 */
U_INSC_BRAHMI_JOINING_NUMBER,
/** @stable ICU 63 */
U_INSC_CANTILLATION_MARK,
/** @stable ICU 63 */
U_INSC_CONSONANT,
/** @stable ICU 63 */
U_INSC_CONSONANT_DEAD,
/** @stable ICU 63 */
U_INSC_CONSONANT_FINAL,
/** @stable ICU 63 */
U_INSC_CONSONANT_HEAD_LETTER,
/** @stable ICU 63 */
U_INSC_CONSONANT_INITIAL_POSTFIXED,
/** @stable ICU 63 */
U_INSC_CONSONANT_KILLER,
/** @stable ICU 63 */
U_INSC_CONSONANT_MEDIAL,
/** @stable ICU 63 */
U_INSC_CONSONANT_PLACEHOLDER,
/** @stable ICU 63 */
U_INSC_CONSONANT_PRECEDING_REPHA,
/** @stable ICU 63 */
U_INSC_CONSONANT_PREFIXED,
/** @stable ICU 63 */
U_INSC_CONSONANT_SUBJOINED,
/** @stable ICU 63 */
U_INSC_CONSONANT_SUCCEEDING_REPHA,
/** @stable ICU 63 */
U_INSC_CONSONANT_WITH_STACKER,
/** @stable ICU 63 */
U_INSC_GEMINATION_MARK,
/** @stable ICU 63 */
U_INSC_INVISIBLE_STACKER,
/** @stable ICU 63 */
U_INSC_JOINER,
/** @stable ICU 63 */
U_INSC_MODIFYING_LETTER,
/** @stable ICU 63 */
U_INSC_NON_JOINER,
/** @stable ICU 63 */
U_INSC_NUKTA,
/** @stable ICU 63 */
U_INSC_NUMBER,
/** @stable ICU 63 */
U_INSC_NUMBER_JOINER,
/** @stable ICU 63 */
U_INSC_PURE_KILLER,
/** @stable ICU 63 */
U_INSC_REGISTER_SHIFTER,
/** @stable ICU 63 */
U_INSC_SYLLABLE_MODIFIER,
/** @stable ICU 63 */
U_INSC_TONE_LETTER,
/** @stable ICU 63 */
U_INSC_TONE_MARK,
/** @stable ICU 63 */
U_INSC_VIRAMA,
/** @stable ICU 63 */
U_INSC_VISARGA,
/** @stable ICU 63 */
U_INSC_VOWEL,
/** @stable ICU 63 */
U_INSC_VOWEL_DEPENDENT,
/** @stable ICU 63 */
U_INSC_VOWEL_INDEPENDENT,
} UIndicSyllabicCategory;
/**
* Vertical Orientation constants.
*
* @see UCHAR_VERTICAL_ORIENTATION
* @stable ICU 63
*/
typedef enum UVerticalOrientation {
/*
* Note: UVerticalOrientation constants are parsed by preparseucd.py.
* It matches lines like
* U_VO_<Unicode Vertical_Orientation value name>
*/
/** @stable ICU 63 */
U_VO_ROTATED,
/** @stable ICU 63 */
U_VO_TRANSFORMED_ROTATED,
/** @stable ICU 63 */
U_VO_TRANSFORMED_UPRIGHT,
/** @stable ICU 63 */
U_VO_UPRIGHT,
} UVerticalOrientation;
/**
* Check a binary Unicode property for a code point.
*
@ -2342,6 +2549,7 @@ typedef enum UHangulSyllableType {
* does not have data for the property at all, or not for this code point.
*
* @see UProperty
* @see u_getBinaryPropertySet
* @see u_getIntPropertyValue
* @see u_getUnicodeVersion
* @stable ICU 2.1
@ -2349,6 +2557,28 @@ typedef enum UHangulSyllableType {
U_STABLE UBool U_EXPORT2
u_hasBinaryProperty(UChar32 c, UProperty which);
#ifndef U_HIDE_DRAFT_API
/**
* Returns a frozen USet for a binary property.
* The library retains ownership over the returned object.
* Sets an error code if the property number is not one for a binary property.
*
* The returned set contains all code points for which the property is true.
*
* @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
* @param pErrorCode an in/out ICU UErrorCode
* @return the property as a set
* @see UProperty
* @see u_hasBinaryProperty
* @see Unicode::fromUSet
* @draft ICU 63
*/
U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
#endif // U_HIDE_DRAFT_API
/**
* Check if a code point has the Alphabetic Unicode property.
* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
@ -2449,6 +2679,7 @@ u_isUWhiteSpace(UChar32 c);
* @see u_hasBinaryProperty
* @see u_getIntPropertyMinValue
* @see u_getIntPropertyMaxValue
* @see u_getIntPropertyMap
* @see u_getUnicodeVersion
* @stable ICU 2.2
*/
@ -2505,6 +2736,28 @@ u_getIntPropertyMinValue(UProperty which);
U_STABLE int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which);
#ifndef U_HIDE_DRAFT_API
/**
* Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
* The library retains ownership over the returned object.
* Sets an error code if the property number is not one for an "int property".
*
* The returned object maps all Unicode code points to their values for that property.
* For documentation of the integer values see u_getIntPropertyValue().
*
* @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
* @param pErrorCode an in/out ICU UErrorCode
* @return the property as a map
* @see UProperty
* @see u_getIntPropertyValue
* @draft ICU 63
*/
U_CAPI const UCPMap * U_EXPORT2
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
#endif // U_HIDE_DRAFT_API
/**
* Get the numeric value for a Unicode code point as defined in the
* Unicode Character Database.

Просмотреть файл

@ -53,19 +53,18 @@
#include "unicode/uenum.h"
#include "unicode/localpointer.h"
#ifndef __USET_H__
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
#define USET_DEFINED
/**
* USet is the C API type for Unicode sets.
* It is forward-declared here to avoid including the header file if related
* USet is the C API type corresponding to C++ class UnicodeSet.
* It is forward-declared here to avoid including unicode/uset.h file if related
* conversion APIs are not used.
* See unicode/uset.h
*
* @see ucnv_getUnicodeSet
* @stable ICU 2.6
* @stable ICU 2.4
*/
struct USet;
/** @stable ICU 2.6 */
typedef struct USet USet;
#endif

Просмотреть файл

@ -183,7 +183,7 @@
*/
#ifdef U_HAVE_LIB_SUFFIX
/* Use the predefined value. */
#elif defined(U_LIB_SUFFIX_C_NAME)
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
# define U_HAVE_LIB_SUFFIX 1
#endif
@ -431,17 +431,6 @@
# define UCONFIG_HAVE_PARSEALLINPUT 1
#endif
/**
* \def UCONFIG_FORMAT_FASTPATHS_49
* This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
*
* @internal
*/
#ifndef UCONFIG_FORMAT_FASTPATHS_49
# define UCONFIG_FORMAT_FASTPATHS_49 1
#endif
/**
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
* This switch turns off filtered break iteration code.

Просмотреть файл

@ -0,0 +1,162 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucpmap.h
// created: 2018sep03 Markus W. Scherer
#ifndef __UCPMAP_H__
#define __UCPMAP_H__
#include "unicode/utypes.h"
#ifndef U_HIDE_DRAFT_API
U_CDECL_BEGIN
/**
* \file
*
* This file defines an abstract map from Unicode code points to integer values.
*
* @see UCPMap
* @see UCPTrie
* @see UMutableCPTrie
*/
/**
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
*
* @see UCPTrie
* @see UMutableCPTrie
* @draft ICU 63
*/
typedef struct UCPMap UCPMap;
/**
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
* Most users should use UCPMAP_RANGE_NORMAL.
*
* @see ucpmap_getRange
* @see ucptrie_getRange
* @see umutablecptrie_getRange
* @draft ICU 63
*/
enum UCPMapRangeOption {
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
* Most users should use this option.
* @draft ICU 63
*/
UCPMAP_RANGE_NORMAL,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_LEAD(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @draft ICU 63
*/
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
/**
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
* except that all surrogates (U+D800..U+DFFF) are treated as having the
* surrogateValue, which is passed to getRange() as a separate parameter.
* The surrogateValue is not transformed via filter().
* See U_IS_SURROGATE(c).
*
* Most users should use UCPMAP_RANGE_NORMAL instead.
*
* This option is useful for maps that map surrogate code *units* to
* special values optimized for UTF-16 string processing
* or for special error behavior for unpaired surrogates,
* but those values are not to be associated with the lead surrogate code *points*.
* @draft ICU 63
*/
UCPMAP_RANGE_FIXED_ALL_SURROGATES
};
#ifndef U_IN_DOXYGEN
typedef enum UCPMapRangeOption UCPMapRangeOption;
#endif
/**
* Returns the value for a code point as stored in the map, with range checking.
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
*
* @param map the map
* @param c the code point
* @return the map value,
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
* @draft ICU 63
*/
U_CAPI uint32_t U_EXPORT2
ucpmap_get(const UCPMap *map, UChar32 c);
/**
* Callback function type: Modifies a map value.
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
* The modified value will be returned by the getRange function.
*
* Can be used to ignore some of the value bits,
* make a filter for one of several values,
* return a value index computed from the map value, etc.
*
* @param context an opaque pointer, as passed into the getRange function
* @param value a value from the map
* @return the modified value
* @draft ICU 63
*/
typedef uint32_t U_CALLCONV
UCPMapValueFilter(const void *context, uint32_t value);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a map.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* If the UCPMapValueFilter function pointer is not NULL, then
* the value to be delivered is passed through that function, and the return value is the end
* of the range where all values are modified to the same actual value.
* The value is unchanged if that function pointer is NULL.
*
* Example:
* \code
* UChar32 start = 0, end;
* uint32_t value;
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
* NULL, NULL, &value)) >= 0) {
* // Work with the range start..end and its value.
* start = end + 1;
* }
* \endcode
*
* @param map the map
* @param start range start
* @param option defines whether surrogates are treated normally,
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
* @param filter a pointer to a function that may modify the map data value,
* or NULL if the values from the map are to be used unmodified
* @param context an opaque pointer that is passed on to the filter function
* @param pValue if not NULL, receives the value that every code point start..end has;
* may have been modified by filter(context, map value)
* if that function pointer is not NULL
* @return the range end code point, or -1 if start is not a valid code point
* @draft ICU 63
*/
U_CAPI UChar32 U_EXPORT2
ucpmap_getRange(const UCPMap *map, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
U_CDECL_END
#endif // U_HIDE_DRAFT_API
#endif

Просмотреть файл

@ -0,0 +1,646 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// ucptrie.h (modified from utrie2.h)
// created: 2017dec29 Markus W. Scherer
#ifndef __UCPTRIE_H__
#define __UCPTRIE_H__
#include "unicode/utypes.h"
#ifndef U_HIDE_DRAFT_API
#include "unicode/localpointer.h"
#include "unicode/ucpmap.h"
#include "unicode/utf8.h"
U_CDECL_BEGIN
/**
* \file
*
* This file defines an immutable Unicode code point trie.
*
* @see UCPTrie
* @see UMutableCPTrie
*/
#ifndef U_IN_DOXYGEN
/** @internal */
typedef union UCPTrieData {
/** @internal */
const void *ptr0;
/** @internal */
const uint16_t *ptr16;
/** @internal */
const uint32_t *ptr32;
/** @internal */
const uint8_t *ptr8;
} UCPTrieData;
#endif
/**
* Immutable Unicode code point trie structure.
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
* For details see http://site.icu-project.org/design/struct/utrie
*
* Do not access UCPTrie fields directly; use public functions and macros.
* Functions are easy to use: They support all trie types and value widths.
*
* When performance is really important, macros provide faster access.
* Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
* There are "fast" macros for special optimized use cases.
*
* The macros will return bogus values, or may crash, if used on the wrong type or value width.
*
* @see UMutableCPTrie
* @draft ICU 63
*/
struct UCPTrie {
#ifndef U_IN_DOXYGEN
/** @internal */
const uint16_t *index;
/** @internal */
UCPTrieData data;
/** @internal */
int32_t indexLength;
/** @internal */
int32_t dataLength;
/** Start of the last range which ends at U+10FFFF. @internal */
UChar32 highStart;
/** highStart>>12 @internal */
uint16_t shifted12HighStart;
/** @internal */
int8_t type; // UCPTrieType
/** @internal */
int8_t valueWidth; // UCPTrieValueWidth
/** padding/reserved @internal */
uint32_t reserved32;
/** padding/reserved @internal */
uint16_t reserved16;
/**
* Internal index-3 null block offset.
* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
* @internal
*/
uint16_t index3NullOffset;
/**
* Internal data null block offset, not shifted.
* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
* @internal
*/
int32_t dataNullOffset;
/** @internal */
uint32_t nullValue;
#ifdef UCPTRIE_DEBUG
/** @internal */
const char *name;
#endif
#endif
};
#ifndef U_IN_DOXYGEN
typedef struct UCPTrie UCPTrie;
#endif
/**
* Selectors for the type of a UCPTrie.
* Different trade-offs for size vs. speed.
*
* @see umutablecptrie_buildImmutable
* @see ucptrie_openFromBinary
* @see ucptrie_getType
* @draft ICU 63
*/
enum UCPTrieType {
/**
* For ucptrie_openFromBinary() to accept any type.
* ucptrie_getType() will return the actual type.
* @draft ICU 63
*/
UCPTRIE_TYPE_ANY = -1,
/**
* Fast/simple/larger BMP data structure. Use functions and "fast" macros.
* @draft ICU 63
*/
UCPTRIE_TYPE_FAST,
/**
* Small/slower BMP data structure. Use functions and "small" macros.
* @draft ICU 63
*/
UCPTRIE_TYPE_SMALL
};
#ifndef U_IN_DOXYGEN
typedef enum UCPTrieType UCPTrieType;
#endif
/**
* Selectors for the number of bits in a UCPTrie data value.
*
* @see umutablecptrie_buildImmutable
* @see ucptrie_openFromBinary
* @see ucptrie_getValueWidth
* @draft ICU 63
*/
enum UCPTrieValueWidth {
/**
* For ucptrie_openFromBinary() to accept any data value width.
* ucptrie_getValueWidth() will return the actual data value width.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_ANY = -1,
/**
* The trie stores 16 bits per data value.
* It returns them as unsigned values 0..0xffff=65535.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_16,
/**
* The trie stores 32 bits per data value.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_32,
/**
* The trie stores 8 bits per data value.
* It returns them as unsigned values 0..0xff=255.
* @draft ICU 63
*/
UCPTRIE_VALUE_BITS_8
};
#ifndef U_IN_DOXYGEN
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
#endif
/**
* Opens a trie from its binary form, stored in 32-bit-aligned memory.
* Inverse of ucptrie_toBinary().
*
* The memory must remain valid and unchanged as long as the trie is used.
* You must ucptrie_close() the trie once you are done using it.
*
* @param type selects the trie type; results in an
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
* use UCPTRIE_TYPE_ANY to accept any type
* @param valueWidth selects the number of bits in a data value; results in an
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
* use UCPTRIE_VALUE_BITS_ANY to accept any data value width
* @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
* @param length the number of bytes available at data;
* can be more than necessary
* @param pActualLength receives the actual number of bytes at data taken up by the trie data;
* can be NULL
* @param pErrorCode an in/out ICU UErrorCode
* @return the trie
*
* @see umutablecptrie_open
* @see umutablecptrie_buildImmutable
* @see ucptrie_toBinary
* @draft ICU 63
*/
U_CAPI UCPTrie * U_EXPORT2
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
const void *data, int32_t length, int32_t *pActualLength,
UErrorCode *pErrorCode);
/**
* Closes a trie and releases associated memory.
*
* @param trie the trie
* @draft ICU 63
*/
U_CAPI void U_EXPORT2
ucptrie_close(UCPTrie *trie);
#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
/**
* \class LocalUCPTriePointer
* "Smart pointer" class, closes a UCPTrie via ucptrie_close().
* For most methods see the LocalPointerBase base class.
*
* @see LocalPointerBase
* @see LocalPointer
* @draft ICU 63
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
U_NAMESPACE_END
#endif
/**
* Returns the trie type.
*
* @param trie the trie
* @return the trie type
* @see ucptrie_openFromBinary
* @see UCPTRIE_TYPE_ANY
* @draft ICU 63
*/
U_CAPI UCPTrieType U_EXPORT2
ucptrie_getType(const UCPTrie *trie);
/**
* Returns the number of bits in a trie data value.
*
* @param trie the trie
* @return the number of bits in a trie data value
* @see ucptrie_openFromBinary
* @see UCPTRIE_VALUE_BITS_ANY
* @draft ICU 63
*/
U_CAPI UCPTrieValueWidth U_EXPORT2
ucptrie_getValueWidth(const UCPTrie *trie);
/**
* Returns the value for a code point as stored in the trie, with range checking.
* Returns the trie error value if c is not in the range 0..U+10FFFF.
*
* Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
* Easier to use because, unlike the macros, this function works on all UCPTrie
* objects, for all types and value widths.
*
* @param trie the trie
* @param c the code point
* @return the trie value,
* or the trie error value if the code point is not in the range 0..U+10FFFF
* @draft ICU 63
*/
U_CAPI uint32_t U_EXPORT2
ucptrie_get(const UCPTrie *trie, UChar32 c);
/**
* Returns the last code point such that all those from start to there have the same value.
* Can be used to efficiently iterate over all same-value ranges in a trie.
* (This is normally faster than iterating over code points and get()ting each value,
* but much slower than a data structure that stores ranges directly.)
*
* If the UCPMapValueFilter function pointer is not NULL, then
* the value to be delivered is passed through that function, and the return value is the end
* of the range where all values are modified to the same actual value.
* The value is unchanged if that function pointer is NULL.
*
* Example:
* \code
* UChar32 start = 0, end;
* uint32_t value;
* while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
* NULL, NULL, &value)) >= 0) {
* // Work with the range start..end and its value.
* start = end + 1;
* }
* \endcode
*
* @param trie the trie
* @param start range start
* @param option defines whether surrogates are treated normally,
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
* @param filter a pointer to a function that may modify the trie data value,
* or NULL if the values from the trie are to be used unmodified
* @param context an opaque pointer that is passed on to the filter function
* @param pValue if not NULL, receives the value that every code point start..end has;
* may have been modified by filter(context, trie value)
* if that function pointer is not NULL
* @return the range end code point, or -1 if start is not a valid code point
* @draft ICU 63
*/
U_CAPI UChar32 U_EXPORT2
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
UCPMapRangeOption option, uint32_t surrogateValue,
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
/**
* Writes a memory-mappable form of the trie into 32-bit aligned memory.
* Inverse of ucptrie_openFromBinary().
*
* @param trie the trie
* @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
* can be NULL if capacity==0
* @param capacity the number of bytes available at data, or 0 for pure preflighting
* @param pErrorCode an in/out ICU UErrorCode;
* U_BUFFER_OVERFLOW_ERROR if the capacity is too small
* @return the number of bytes written or (if buffer overflow) needed for the trie
*
* @see ucptrie_openFromBinary()
* @draft ICU 63
*/
U_CAPI int32_t U_EXPORT2
ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
/**
* Macro parameter value for a trie with 16-bit data values.
* Use the name of this macro as a "dataAccess" parameter in other macros.
* Do not use this macro in any other way.
*
* @see UCPTRIE_VALUE_BITS_16
* @draft ICU 63
*/
#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
/**
* Macro parameter value for a trie with 32-bit data values.
* Use the name of this macro as a "dataAccess" parameter in other macros.
* Do not use this macro in any other way.
*
* @see UCPTRIE_VALUE_BITS_32
* @draft ICU 63
*/
#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
/**
* Macro parameter value for a trie with 8-bit data values.
* Use the name of this macro as a "dataAccess" parameter in other macros.
* Do not use this macro in any other way.
*
* @see UCPTRIE_VALUE_BITS_8
* @draft ICU 63
*/
#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
/**
* Returns a trie value for a code point, with range checking.
* Returns the trie error value if c is not in the range 0..U+10FFFF.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param c (UChar32, in) the input code point
* @return The code point's trie value.
* @draft ICU 63
*/
#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
/**
* Returns a 16-bit trie value for a code point, with range checking.
* Returns the trie error value if c is not in the range U+0000..U+10FFFF.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param c (UChar32, in) the input code point
* @return The code point's trie value.
* @draft ICU 63
*/
#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
/**
* UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
* and gets a value from the trie.
* Sets the trie error value if c is an unpaired surrogate.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param src (const UChar *, in/out) the source text pointer
* @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
* @param c (UChar32, out) variable for the code point
* @param result (out) variable for the trie lookup result
* @draft ICU 63
*/
#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \
(c) = *(src)++; \
int32_t __index; \
if (!U16_IS_SURROGATE(c)) { \
__index = _UCPTRIE_FAST_INDEX(trie, c); \
} else { \
uint16_t __c2; \
if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
++(src); \
(c) = U16_GET_SUPPLEMENTARY((c), __c2); \
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
} else { \
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
} \
} \
(result) = dataAccess(trie, __index); \
}
/**
* UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
* and gets a value from the trie.
* Sets the trie error value if c is an unpaired surrogate.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param start (const UChar *, in) the start pointer for the text
* @param src (const UChar *, in/out) the source text pointer
* @param c (UChar32, out) variable for the code point
* @param result (out) variable for the trie lookup result
* @draft ICU 63
*/
#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \
(c) = *--(src); \
int32_t __index; \
if (!U16_IS_SURROGATE(c)) { \
__index = _UCPTRIE_FAST_INDEX(trie, c); \
} else { \
uint16_t __c2; \
if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
--(src); \
(c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
} else { \
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
} \
} \
(result) = dataAccess(trie, __index); \
}
/**
* UTF-8: Post-increments src and gets a value from the trie.
* Sets the trie error value for an ill-formed byte sequence.
*
* Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
* because it would be more work to do so and is often not needed.
* If the trie value differs from the error value, then the byte sequence is well-formed,
* and the code point can be assembled without revalidation.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param src (const char *, in/out) the source text pointer
* @param limit (const char *, in) the limit pointer for the text (must not be NULL)
* @param result (out) variable for the trie lookup result
* @draft ICU 63
*/
#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \
int32_t __lead = (uint8_t)*(src)++; \
if (!U8_IS_SINGLE(__lead)) { \
uint8_t __t1, __t2, __t3; \
if ((src) != (limit) && \
(__lead >= 0xe0 ? \
__lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
(__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
: /* U+10000..U+10FFFF */ \
(__lead -= 0xf0) <= 4 && \
U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
(__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
(__t2 = *(src) - 0x80) <= 0x3f && \
++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
(__lead = __lead >= (trie)->shifted12HighStart ? \
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
: /* U+0080..U+07FF */ \
__lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
(__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
++(src); \
} else { \
__lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \
} \
} \
(result) = dataAccess(trie, __lead); \
}
/**
* UTF-8: Pre-decrements src and gets a value from the trie.
* Sets the trie error value for an ill-formed byte sequence.
*
* Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
* because it would be more work to do so and is often not needed.
* If the trie value differs from the error value, then the byte sequence is well-formed,
* and the code point can be assembled without revalidation.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param start (const char *, in) the start pointer for the text
* @param src (const char *, in/out) the source text pointer
* @param result (out) variable for the trie lookup result
* @draft ICU 63
*/
#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \
int32_t __index = (uint8_t)*--(src); \
if (!U8_IS_SINGLE(__index)) { \
__index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
(const uint8_t *)(src)); \
(src) -= __index & 7; \
__index >>= 3; \
} \
(result) = dataAccess(trie, __index); \
}
/**
* Returns a trie value for an ASCII code point, without range checking.
*
* @param trie (const UCPTrie *, in) the trie (of either fast or small type)
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param c (UChar32, in) the input code point; must be U+0000..U+007F
* @return The ASCII code point's trie value.
* @draft ICU 63
*/
#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
/**
* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
* Can be used to look up a value for a UTF-16 code unit if other parts of
* the string processing check for surrogates.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
* @return The BMP code point's trie value.
* @draft ICU 63
*/
#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
/**
* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
* without range checking.
*
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the tries value width
* @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
* @return The supplementary code point's trie value.
* @draft ICU 63
*/
#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
/* Internal definitions ----------------------------------------------------- */
#ifndef U_IN_DOXYGEN
/**
* Internal implementation constants.
* These are needed for the API macros, but users should not use these directly.
* @internal
*/
enum {
/** @internal */
UCPTRIE_FAST_SHIFT = 6,
/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
/** @internal */
UCPTRIE_SMALL_MAX = 0xfff,
/**
* Offset from dataLength (to be subtracted) for fetching the
* value returned for out-of-range code points and ill-formed UTF-8/16.
* @internal
*/
UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
/**
* Offset from dataLength (to be subtracted) for fetching the
* value returned for code points highStart..U+10FFFF.
* @internal
*/
UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
};
/* Internal functions and macros -------------------------------------------- */
// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
/** @internal */
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);
/** @internal */
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
/**
* Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
* Do not call directly.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
const uint8_t *start, const uint8_t *src);
/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
#define _UCPTRIE_FAST_INDEX(trie, c) \
((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
#define _UCPTRIE_SMALL_INDEX(trie, c) \
((c) >= (trie)->highStart ? \
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
ucptrie_internalSmallIndex(trie, c))
/**
* Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
* Returns the data index.
* @internal
*/
#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
((uint32_t)(c) <= (uint32_t)(fastMax) ? \
_UCPTRIE_FAST_INDEX(trie, c) : \
(uint32_t)(c) <= 0x10ffff ? \
_UCPTRIE_SMALL_INDEX(trie, c) : \
(trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
U_CDECL_END
#endif // U_IN_DOXYGEN
#endif // U_HIDE_DRAFT_API
#endif

Просмотреть файл

@ -60,6 +60,7 @@ enum UCurrencyUsage {
UCURR_USAGE_COUNT=2
#endif // U_HIDE_DEPRECATED_API
};
/** Currency Usage used for Decimal Format */
typedef enum UCurrencyUsage UCurrencyUsage;
/**

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше