Backed out 11 changesets (bug 1763783) for causing gtest failures. CLOSED TREE

Backed out changeset 87e552902463 (bug 1763783)
Backed out changeset ed05d313926c (bug 1763783)
Backed out changeset 89b4fcf7e929 (bug 1763783)
Backed out changeset e0dc696a1d53 (bug 1763783)
Backed out changeset e8a743f53265 (bug 1763783)
Backed out changeset 5d3f1290d8ba (bug 1763783)
Backed out changeset 42ef2c926e71 (bug 1763783)
Backed out changeset e346cf03c468 (bug 1763783)
Backed out changeset 81c196a9f7c5 (bug 1763783)
Backed out changeset 51e22286e481 (bug 1763783)
Backed out changeset a2a8cd91233c (bug 1763783)
This commit is contained in:
Marian-Vasile Laza 2022-04-09 10:48:23 -07:00
Родитель 83e72b1d28
Коммит 15bdebfa75
602 изменённых файлов: 5515 добавлений и 73887 удалений

Просмотреть файл

@ -22,4 +22,4 @@
# changes to stick? As of bug 928195, this shouldn't be necessary! Please
# don't change CLOBBER for WebIDL changes any more.
Bug 1763783 - Update to ICU 71 requires clobber
Merge day clobber 2022-04-04

Двоичные данные
config/external/icu/data/icudt71l.dat → config/external/icu/data/icudt70l.dat поставляемый

Двоичный файл не отображается.

1
config/external/icu/i18n/sources.mozbuild поставляемый
Просмотреть файл

@ -318,7 +318,6 @@ EXPORTS.unicode += [
'/intl/icu/source/i18n/unicode/ulocdata.h',
'/intl/icu/source/i18n/unicode/umsg.h',
'/intl/icu/source/i18n/unicode/unirepl.h',
'/intl/icu/source/i18n/unicode/unounclass.h',
'/intl/icu/source/i18n/unicode/unum.h',
'/intl/icu/source/i18n/unicode/unumberformatter.h',
'/intl/icu/source/i18n/unicode/unumberrangeformatter.h',

Просмотреть файл

@ -419,7 +419,8 @@ class DateTimeFormat final {
* plan to remove it.
*/
template <typename B>
ICUResult GetOriginalSkeleton(B& aBuffer) {
ICUResult GetOriginalSkeleton(B& aBuffer,
Maybe<HourCycle> aHourCycle = Nothing()) {
static_assert(std::is_same_v<typename B::CharType, char16_t>);
if (mOriginalSkeleton.length() == 0) {
// Generate a skeleton from the resolved pattern, there was no originally
@ -435,6 +436,10 @@ class DateTimeFormat final {
if (!FillBuffer(mOriginalSkeleton, aBuffer)) {
return Err(ICUError::OutOfMemory);
}
if (aHourCycle) {
DateTimeFormat::ReplaceHourSymbol(Span(aBuffer.data(), aBuffer.length()),
*aHourCycle);
}
return Ok();
}
/**

Просмотреть файл

@ -1,6 +1,6 @@
// Generated by make_intl_data.py. DO NOT EDIT.
// Version: CLDR-41
// URL: https://unicode.org/Public/cldr/41/core.zip
// Version: CLDR-40
// URL: https://unicode.org/Public/cldr/40/core.zip
#include "mozilla/Assertions.h"
#include "mozilla/Span.h"
@ -99,8 +99,8 @@ static bool IsCanonicallyCasedTransformType(mozilla::Span<const char> type) {
#endif
// Mappings from language subtags to preferred values.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@ -219,8 +219,8 @@ bool mozilla::intl::Locale::LanguageMapping(LanguageSubtag& language) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& language) {
MOZ_ASSERT(IsStructurallyValidLanguageTag(language.Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(language.Span()));
@ -241,8 +241,8 @@ bool mozilla::intl::Locale::ComplexLanguageMapping(const LanguageSubtag& languag
}
// Mappings from script subtags to preferred values.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
MOZ_ASSERT(IsStructurallyValidScriptTag(script.Span()));
MOZ_ASSERT(IsCanonicallyCasedScriptTag(script.Span()));
@ -257,8 +257,8 @@ bool mozilla::intl::Locale::ScriptMapping(ScriptSubtag& script) {
}
// Mappings from region subtags to preferred values.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@ -357,8 +357,8 @@ bool mozilla::intl::Locale::RegionMapping(RegionSubtag& region) {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
MOZ_ASSERT(IsStructurallyValidRegionTag(region.Span()));
MOZ_ASSERT(IsCanonicallyCasedRegionTag(region.Span()));
@ -380,8 +380,8 @@ bool mozilla::intl::Locale::ComplexRegionMapping(const RegionSubtag& region) {
}
// Language subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
void mozilla::intl::Locale::PerformComplexLanguageMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
@ -416,8 +416,8 @@ void mozilla::intl::Locale::PerformComplexLanguageMappings() {
}
// Region subtags with complex mappings.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
void mozilla::intl::Locale::PerformComplexRegionMappings() {
MOZ_ASSERT(IsStructurallyValidLanguageTag(Language().Span()));
MOZ_ASSERT(IsCanonicallyCasedLanguageTag(Language().Span()));
@ -643,8 +643,8 @@ static bool IsLessThan(const T& a, const U& b) {
}
// Mappings from variant subtags to preferred values.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::PerformVariantMappings() {
// The variant subtags need to be sorted for binary search.
MOZ_ASSERT(std::is_sorted(mVariants.begin(), mVariants.end(),
@ -707,8 +707,8 @@ bool mozilla::intl::Locale::PerformVariantMappings() {
}
// Canonicalize legacy locale identifiers.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::UpdateLegacyMappings() {
// We're mapping legacy tags to non-legacy form here.
// Other tags remain unchanged.
@ -865,8 +865,8 @@ bool mozilla::intl::Locale::UpdateLegacyMappings() {
}
// Mappings from legacy sign languages.
// Derived from CLDR Supplemental Data, version 41.
// https://unicode.org/Public/cldr/41/core.zip
// Derived from CLDR Supplemental Data, version 40.
// https://unicode.org/Public/cldr/40/core.zip
bool mozilla::intl::Locale::SignLanguageMapping(LanguageSubtag& language,
const RegionSubtag& region) {
MOZ_ASSERT(language.EqualTo("sgn"));
@ -1112,18 +1112,9 @@ const char* mozilla::intl::Locale::ReplaceTransformExtensionType(
}
}
else if (IsTransformKey(key, "m0")) {
if (IsTransformType(type, "beta-metsehaf")) {
return "betamets";
}
if (IsTransformType(type, "ies-jes")) {
return "iesjes";
}
if (IsTransformType(type, "names")) {
return "prprname";
}
if (IsTransformType(type, "tekie-alibekit")) {
return "tekieali";
}
}
return nullptr;
}

Просмотреть файл

@ -378,7 +378,7 @@ Maybe<NumberPartType> GetPartTypeForNumberField(UNumberFormatFields fieldName,
return Some(NumberPartType::Unit);
case UNUM_COMPACT_FIELD:
return Some(NumberPartType::Compact);
#ifndef U_HIDE_DRAFT_API
#if !MOZ_SYSTEM_ICU
case UNUM_APPROXIMATELY_SIGN_FIELD:
return Some(NumberPartType::ApproximatelySign);
#endif

Просмотреть файл

@ -169,8 +169,13 @@ Result<int32_t, ICUError> TimeZone::GetUTCOffsetMs(int64_t aLocalMilliseconds) {
// time starts or the time zone offset is increased due to a time zone rule
// change), t_local must be interpreted using the time zone offset before the
// transition.
#ifndef U_HIDE_DRAFT_API
constexpr UTimeZoneLocalOption skippedTime = UCAL_TZ_LOCAL_FORMER;
constexpr UTimeZoneLocalOption repeatedTime = UCAL_TZ_LOCAL_FORMER;
#else
constexpr UTimeZoneLocalOption skippedTime = UTimeZoneLocalOption(0x4);
constexpr UTimeZoneLocalOption repeatedTime = UTimeZoneLocalOption(0x4);
#endif
UDate date = UDate(aLocalMilliseconds);

Просмотреть файл

@ -0,0 +1,56 @@
# Add a new UNumberFormatFields constant for the approximately sign.
#
# https://unicode-org.atlassian.net/browse/ICU-21765
diff --git a/intl/icu/source/i18n/number_affixutils.cpp b/intl/icu/source/i18n/number_affixutils.cpp
--- a/intl/icu/source/i18n/number_affixutils.cpp
+++ b/intl/icu/source/i18n/number_affixutils.cpp
@@ -131,17 +131,17 @@ UnicodeString AffixUtils::escape(const U
Field AffixUtils::getFieldForType(AffixPatternType type) {
switch (type) {
case TYPE_MINUS_SIGN:
return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD};
case TYPE_PLUS_SIGN:
return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD};
case TYPE_APPROXIMATELY_SIGN:
// TODO: Introduce a new field for the approximately sign?
- return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD};
+ return {UFIELD_CATEGORY_NUMBER, UNUM_APPROXIMATELY_SIGN_FIELD};
case TYPE_PERCENT:
return {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD};
case TYPE_PERMILLE:
return {UFIELD_CATEGORY_NUMBER, UNUM_PERMILL_FIELD};
case TYPE_CURRENCY_SINGLE:
return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
case TYPE_CURRENCY_DOUBLE:
return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
diff --git a/intl/icu/source/i18n/unicode/unum.h b/intl/icu/source/i18n/unicode/unum.h
--- a/intl/icu/source/i18n/unicode/unum.h
+++ b/intl/icu/source/i18n/unicode/unum.h
@@ -397,22 +397,24 @@ typedef enum UNumberFormatFields {
UNUM_PERMILL_FIELD,
/** @stable ICU 49 */
UNUM_SIGN_FIELD,
/** @stable ICU 64 */
UNUM_MEASURE_UNIT_FIELD,
/** @stable ICU 64 */
UNUM_COMPACT_FIELD,
+ UNUM_APPROXIMATELY_SIGN_FIELD,
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UNumberFormatFields value.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UNUM_FIELD_COUNT = UNUM_SIGN_FIELD + 3
+ UNUM_FIELD_COUNT = UNUM_SIGN_FIELD + 4
#endif /* U_HIDE_DEPRECATED_API */
} UNumberFormatFields;
/**
* Selectors with special numeric values to use locale default minimum grouping
* digits for the DecimalFormat/UNumberFormat setMinimumGroupingDigits method.
* Do not use these constants with the [U]NumberFormatter API.

Просмотреть файл

@ -1,5 +1,5 @@
commit c205e7ee49a7086a28b9c275fcfdac9ca3dc815d
Author: yumaoka <y.umaoka@gmail.com>
Date: Wed Mar 30 14:47:46 2022 -0400
commit a56dde820dc35665a66f2e9ee8ba58e75049b668
Author: Shane F. Carr <shane@unicode.org>
Date: Wed Oct 27 15:02:46 2021 -0700
ICU-21971 Added a new numeric currecny code SLE/695 for Sierra Leone Leone.
ICU-21579 Fix warnings in number formatting code

Просмотреть файл

Просмотреть файл

@ -79,7 +79,6 @@ UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
UVector32 &/*foundBreaks*/,
UBool /* isPhraseBreaking */,
UErrorCode &status) const {
if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);

Просмотреть файл

@ -75,7 +75,6 @@ class LanguageBreakEngine : public UMemory {
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const = 0;
};
@ -195,7 +194,6 @@ class UnhandledEngine : public LanguageBreakEngine {
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode &status) const override;
/**

Просмотреть файл

@ -30,7 +30,6 @@
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "unicode/filteredbrk.h"
#include "bytesinkutil.h"
#include "ucln_cmn.h"
#include "cstring.h"
#include "umutex.h"
@ -116,7 +115,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status);
result = new RuleBasedBreakIterator(file, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
@ -409,6 +408,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
if (U_FAILURE(status)) {
return NULL;
}
char lbType[kKeyValueLenMax];
BreakIterator *result = NULL;
switch (kind) {
@ -428,29 +428,18 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
break;
case UBRK_LINE:
{
char lb_lw[kKeyValueLenMax];
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
uprv_strcpy(lb_lw, "line");
uprv_strcpy(lbType, "line");
char lbKeyValue[kKeyValueLenMax] = {0};
UErrorCode kvStatus = U_ZERO_ERROR;
CharString value;
CharStringByteSink valueSink(&value);
loc.getKeywordValue("lb", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus);
if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) {
uprv_strcat(lbType, "_");
uprv_strcat(lbType, lbKeyValue);
}
// lw=phrase is only supported in Japanese.
if (uprv_strcmp(loc.getLanguage(), "ja") == 0) {
value.clear();
loc.getKeywordValue("lw", valueSink, kvStatus);
if (U_SUCCESS(kvStatus) && value == "phrase") {
uprv_strcat(lb_lw, "_");
uprv_strcat(lb_lw, value.data());
}
}
result = BreakIterator::buildInstance(loc, lb_lw, status);
result = BreakIterator::buildInstance(loc, lbType, status);
UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw);
UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue);
UTRACE_EXIT_STATUS(status);
}
break;

Просмотреть файл

@ -58,7 +58,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc71d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc70d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>
@ -70,7 +70,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc71.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc70.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>

Просмотреть файл

@ -125,7 +125,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc71.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc70.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>
@ -148,7 +148,7 @@
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc71d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc70d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>

Просмотреть файл

@ -17,10 +17,7 @@
#include "dictbe.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/resbund.h"
#include "unicode/ubrk.h"
#include "unicode/usetiter.h"
#include "ubrkimpl.h"
#include "utracimp.h"
#include "uvectr32.h"
#include "uvector.h"
@ -51,7 +48,6 @@ DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
(void)startPos; // TODO: remove this param?
@ -72,7 +68,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
}
rangeStart = start;
rangeEnd = current;
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status);
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status);
utext_setNativeIndex(text, current);
return result;
@ -203,13 +199,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai");
UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status);
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(thaiWordSet);
setCharacters(fThaiWordSet);
}
fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = thaiWordSet;
fEndWordSet = fThaiWordSet;
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
@ -234,7 +230,6 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
utext_setNativeIndex(text, rangeStart);
@ -446,13 +441,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo");
UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status);
fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(laoWordSet);
setCharacters(fLaoWordSet);
}
fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = laoWordSet;
fEndWordSet = fLaoWordSet;
fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels
fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters)
fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent)
@ -474,7 +469,6 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
@ -643,13 +637,14 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr");
fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status);
fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fEndWordSet);
setCharacters(fBurmeseWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fBurmeseWordSet;
fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels
// Compact for caching.
fMarkSet.compact();
@ -667,7 +662,6 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status ) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
@ -836,13 +830,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(khmerWordSet);
setCharacters(fKhmerWordSet);
}
fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = khmerWordSet;
fEndWordSet = fKhmerWordSet;
fBeginWordSet.add(0x1780, 0x17B3);
//fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
//fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
@ -873,7 +867,6 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status ) const {
if (U_FAILURE(status)) return 0;
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
@ -1057,27 +1050,25 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType
: DictionaryBreakEngine(), fDictionary(adoptDictionary) {
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani");
nfkcNorm2 = Normalizer2::getNFKCInstance(status);
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status);
fHangulWordSet.compact();
// Digits, open puncutation and Alphabetic characters.
fDigitOrOpenPunctuationOrAlphabetSet.applyPattern(
UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status);
fDigitOrOpenPunctuationOrAlphabetSet.compact();
fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status);
fClosePunctuationSet.compact();
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
nfkcNorm2 = Normalizer2::getNFKCInstance(status);
if (U_SUCCESS(status)) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
if (U_SUCCESS(status)) {
setCharacters(fHangulWordSet);
}
} else { //Chinese and Japanese
UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status);
if (U_SUCCESS(status)) {
UnicodeSet cjSet;
cjSet.addAll(fHanWordSet);
cjSet.addAll(fKatakanaWordSet);
cjSet.addAll(fHiraganaWordSet);
cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
setCharacters(cjSet);
initJapanesePhraseParameter(status);
}
}
UTRACE_EXIT_STATUS(status);
@ -1105,6 +1096,7 @@ static inline bool isKatakana(UChar32 value) {
(value >= 0xFF66 && value <= 0xFF9f);
}
// Function for accessing internal utext flags.
// Replicates an internal UText function.
@ -1112,6 +1104,7 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
return (int32_t)1 << bitIndex;
}
/*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
@ -1124,7 +1117,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
if (rangeStart >= rangeEnd) {
@ -1355,31 +1347,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
t_boundary.addElement(numCodePts, status);
numBreaks++;
} else if (isPhraseBreaking) {
t_boundary.addElement(numCodePts, status);
if(U_SUCCESS(status)) {
numBreaks++;
int32_t prevIdx = numCodePts;
int32_t codeUnitIdx = -1;
int32_t prevCodeUnitIdx = -1;
int32_t length = -1;
for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) {
codeUnitIdx = inString.moveIndex32(0, i);
prevCodeUnitIdx = inString.moveIndex32(0, prevIdx);
// Calculate the length by using the code unit.
length = prevCodeUnitIdx - codeUnitIdx;
prevIdx = i;
// Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana
// characters don't occur.
if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length))
&& (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1)))
|| !isKatakana(inString.char32At(codeUnitIdx)))) {
t_boundary.addElement(i, status);
numBreaks++;
}
}
}
} else {
for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) {
t_boundary.addElement(i, status);
@ -1400,7 +1367,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
// while reversing t_boundary and pushing values to foundBreaks.
int32_t prevCPPos = -1;
int32_t prevUTextPos = -1;
int32_t correctedNumBreaks = 0;
for (int32_t i = numBreaks-1; i >= 0; i--) {
int32_t cpPos = t_boundary.elementAti(i);
U_ASSERT(cpPos > prevCPPos);
@ -1409,15 +1375,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
if (utextPos > prevUTextPos) {
// Boundaries are added to foundBreaks output in ascending order.
U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
// In phrase breaking, there has to be a breakpoint between Cj character and close
// punctuation.
// E.g.[携帯電話]正しい選択 -> [携帯▁電話]▁正しい▁選択 -> breakpoint between and 正
if (utextPos != rangeStart
|| (isPhraseBreaking && utextPos > 0
&& fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) {
foundBreaks.push(utextPos, status);
correctedNumBreaks++;
}
} else {
// Normalization expanded the input text, the dictionary found a boundary
// within the expansion, giving two boundaries with the same index in the
@ -1429,52 +1387,9 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
}
(void)prevCPPos; // suppress compiler warnings about unused variable
UChar32 nextChar = utext_char32At(inText, rangeEnd);
if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) {
// In phrase breaking, there has to be a breakpoint between Cj character and
// the number/open punctuation.
// E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「
// E.g. 乗車率90%程度だろうか -> 乗車▁率▁90%▁程度だろうか -> breakpoint between 率 and
// E.g. しかもロゴがUnicode! -> しかも▁ロゴが▁Unicode!-> breakpoint between が and
if (isPhraseBreaking) {
if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) {
foundBreaks.popi();
correctedNumBreaks--;
}
} else {
foundBreaks.popi();
correctedNumBreaks--;
}
}
// inString goes out of scope
// inputMap goes out of scope
return correctedNumBreaks;
}
void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) {
loadJapaneseExtensions(error);
loadHiragana(error);
}
void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) {
const char* tag = "extensions";
ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error);
if (U_SUCCESS(error)) {
ResourceBundle bundle = ja.get(tag, error);
while (U_SUCCESS(error) && bundle.hasNext()) {
fSkipSet.puti(bundle.getNextString(error), 1, error);
}
}
}
void CjkBreakEngine::loadHiragana(UErrorCode& error) {
UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error);
hiraganaWordSet.compact();
UnicodeSetIterator iterator(hiraganaWordSet);
while (iterator.next()) {
fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error);
}
return numBreaks;
}
#endif

Просмотреть файл

@ -15,7 +15,6 @@
#include "unicode/utext.h"
#include "brkeng.h"
#include "hash.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
@ -81,7 +80,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status ) const override;
protected:
@ -107,7 +105,6 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const = 0;
};
@ -130,6 +127,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
@ -166,7 +164,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@ -189,6 +186,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fLaoWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@ -224,7 +222,6 @@ class LaoBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@ -247,6 +244,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fBurmeseWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@ -282,7 +280,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@ -305,6 +302,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fKhmerWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
@ -340,7 +338,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};
@ -369,22 +366,13 @@ class CjkBreakEngine : public DictionaryBreakEngine {
* @internal
*/
UnicodeSet fHangulWordSet;
UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet;
UnicodeSet fClosePunctuationSet;
UnicodeSet fHanWordSet;
UnicodeSet fKatakanaWordSet;
UnicodeSet fHiraganaWordSet;
DictionaryMatcher *fDictionary;
const Normalizer2 *nfkcNorm2;
private:
// Load Japanese extensions.
void loadJapaneseExtensions(UErrorCode& error);
// Load Japanese Hiragana.
void loadHiragana(UErrorCode& error);
// Initialize fSkipSet by loading Japanese Hiragana and extensions.
void initJapanesePhraseParameter(UErrorCode& error);
Hashtable fSkipSet;
public:
/**
@ -416,7 +404,6 @@ class CjkBreakEngine : public DictionaryBreakEngine {
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
};

Просмотреть файл

@ -168,9 +168,12 @@ void LocaleMatcher::Builder::clearSupportedLocales() {
bool LocaleMatcher::Builder::ensureSupportedLocaleVector() {
if (U_FAILURE(errorCode_)) { return false; }
if (supportedLocales_ != nullptr) { return true; }
LocalPointer<UVector> lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_);
supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_);
if (U_FAILURE(errorCode_)) { return false; }
supportedLocales_ = lpSupportedLocales.orphan();
if (supportedLocales_ == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return false;
}
return true;
}
@ -184,8 +187,9 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
for (int32_t i = 0; i < length; ++i) {
Locale *locale = list.orphanLocaleAt(i);
if (locale == nullptr) { continue; }
supportedLocales_->adoptElement(locale, errorCode_);
supportedLocales_->addElementX(locale, errorCode_);
if (U_FAILURE(errorCode_)) {
delete locale;
break;
}
}
@ -193,21 +197,35 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) {
if (ensureSupportedLocaleVector()) {
if (U_FAILURE(errorCode_)) { return *this; }
clearSupportedLocales();
while (locales.hasNext() && U_SUCCESS(errorCode_)) {
if (!ensureSupportedLocaleVector()) { return *this; }
while (locales.hasNext()) {
const Locale &locale = locales.next();
LocalPointer<Locale> clone (locale.clone(), errorCode_);
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
break;
}
supportedLocales_->addElementX(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
break;
}
}
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) {
if (ensureSupportedLocaleVector()) {
LocalPointer<Locale> clone(locale.clone(), errorCode_);
supportedLocales_->adoptElement(clone.orphan(), errorCode_);
if (!ensureSupportedLocaleVector()) { return *this; }
Locale *clone = locale.clone();
if (clone == nullptr) {
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
supportedLocales_->addElementX(clone, errorCode_);
if (U_FAILURE(errorCode_)) {
delete clone;
}
return *this;
}

Просмотреть файл

@ -1204,11 +1204,14 @@ AliasReplacer::parseLanguageReplacement(
// We have multiple field so we have to allocate and parse
CharString* str = new CharString(
replacement, (int32_t)uprv_strlen(replacement), status);
LocalPointer<CharString> lpStr(str, status);
toBeFreed.adoptElement(lpStr.orphan(), status);
if (U_FAILURE(status)) {
return;
}
if (str == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
toBeFreed.addElementX(str, status);
char* data = str->data();
replacedLanguage = (const char*) data;
char* endOfField = uprv_strchr(data, '_');
@ -1417,9 +1420,12 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
(int32_t)(firstSpace - replacement), status), status);
}
if (U_FAILURE(status)) { return false; }
if (item.isNull()) {
status = U_MEMORY_ALLOCATION_ERROR;
return false;
}
replacedRegion = item->data();
toBeFreed.adoptElement(item.orphan(), status);
if (U_FAILURE(status)) { return false; }
toBeFreed.addElementX(item.orphan(), status);
}
U_ASSERT(!same(region, replacedRegion));
region = replacedRegion;
@ -1653,10 +1659,10 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
U_SUCCESS(status)) {
*end = NULL_CHAR; // null terminate inside variantsBuff
variants.addElement(start, status);
variants.addElementX(start, status);
start = end + 1;
}
variants.addElement(start, status);
variants.addElementX(start, status);
}
if (U_FAILURE(status)) { return false; }

Просмотреть файл

@ -1,8 +1,8 @@
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <complex>
#include <utility>
#include <ctgmath>
#include "unicode/utypes.h"
@ -639,7 +639,6 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
int32_t beginFoundBreakSize = foundBreaks.size();

Просмотреть файл

@ -62,7 +62,6 @@ protected:
int32_t rangeStart,
int32_t rangeEnd,
UVector32 &foundBreaks,
UBool isPhraseBreaking,
UErrorCode& status) const override;
private:
const LSTMData* fData;

Просмотреть файл

@ -2496,18 +2496,15 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
// origin is not the first character, or it is U+0000.
UnicodeSet *set;
if((canonValue&CANON_HAS_SET)==0) {
LocalPointer<UnicodeSet> lpSet(new UnicodeSet, errorCode);
set=lpSet.getAlias();
if(U_FAILURE(errorCode)) {
set=new UnicodeSet;
if(set==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
canonStartSets.adoptElement(lpSet.orphan(), errorCode);
if (U_FAILURE(errorCode)) {
return;
}
canonStartSets.addElementX(set, errorCode);
if(firstOrigin!=0) {
set->add(firstOrigin);
}

Просмотреть файл

@ -82,19 +82,6 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode
}
}
//-------------------------------------------------------------------------------
//
// Constructor from a UDataMemory handle to precompiled break rules
// stored in an ICU data file. This construcotr is private API,
// only for internal use.
//
//-------------------------------------------------------------------------------
RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UBool isPhraseBreaking,
UErrorCode &status) : RuleBasedBreakIterator(udm, status)
{
fIsPhraseBreaking = isPhraseBreaking;
}
//
// Construct from precompiled binary rules (tables). This constructor is public API,
// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
@ -335,7 +322,6 @@ void RuleBasedBreakIterator::init(UErrorCode &status) {
fBreakCache = nullptr;
fDictionaryCache = nullptr;
fLookAheadMatches = nullptr;
fIsPhraseBreaking = false;
// Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER.
// fText = UTEXT_INITIALIZER;

Просмотреть файл

@ -163,7 +163,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, status);
}
// Reload the loop variables for the next go-round

Просмотреть файл

@ -625,7 +625,10 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
}
}
LocalPointer<UnicodeString> idClone(id->clone(), status);
LocalPointer<UnicodeString> idClone(new UnicodeString(*id), status);
if (U_SUCCESS(status) && idClone->isBogus()) {
status = U_MEMORY_ALLOCATION_ERROR;
}
result.adoptElement(idClone.orphan(), status);
}
delete fallbackKey;

Просмотреть файл

@ -179,8 +179,7 @@ private:
length = other._ids.size();
for(i = 0; i < length; ++i) {
LocalPointer<UnicodeString> clonedId(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
_ids.adoptElement(clonedId.orphan(), status);
_ids.addElementX(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
}
if(U_SUCCESS(status)) {

Просмотреть файл

@ -49,11 +49,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
if (acceptsListener(*l)) {
Mutex lmx(&notifyLock);
if (listeners == NULL) {
LocalPointer<UVector> lpListeners(new UVector(5, status), status);
if (U_FAILURE(status)) {
return;
}
listeners = lpListeners.orphan();
listeners = new UVector(5, status);
} else {
for (int i = 0, e = listeners->size(); i < e; ++i) {
const EventListener* el = (const EventListener*)(listeners->elementAt(i));
@ -63,7 +59,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
}
}
listeners->addElement((void*)l, status); // cast away const
listeners->addElementX((void*)l, status); // cast away const
}
#ifdef NOTIFIER_DEBUG
else {
@ -106,6 +102,7 @@ ICUNotifier::removeListener(const EventListener *l, UErrorCode& status)
void
ICUNotifier::notifyChanged(void)
{
if (listeners != NULL) {
Mutex lmx(&notifyLock);
if (listeners != NULL) {
for (int i = 0, e = listeners->size(); i < e; ++i) {
@ -114,6 +111,7 @@ ICUNotifier::notifyChanged(void)
}
}
}
}
U_NAMESPACE_END

Просмотреть файл

@ -168,7 +168,7 @@ ubrk_safeClone(
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
if (newBI == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
} else if (pBufferSize != NULL) {
} else {
*status = U_SAFECLONE_ALLOCATED_WARNING;
}
return (UBreakIterator *)newBI;
@ -176,7 +176,15 @@ ubrk_safeClone(
U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
return ubrk_safeClone(bi, nullptr, nullptr, status);
if (U_FAILURE(*status)) {
return nullptr;
}
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
if (newBI == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return (UBreakIterator *)newBI;
}

Просмотреть файл

@ -22,14 +22,27 @@
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uset.h"
#include "unicode/udata.h" /* UDataInfo */
#include "unicode/utf16.h"
#include "cmemory.h"
#include "uassert.h"
#include "ucase.h"
#include "ucmndata.h" /* DataHeader */
#include "udatamem.h"
#include "umutex.h"
#include "uassert.h"
#include "cmemory.h"
#include "utrie2.h"
#include "ucase.h"
/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */
struct UCaseProps {
UDataMemory *mem;
const int32_t *indexes;
const uint16_t *exceptions;
const uint16_t *unfold;
UTrie2 trie;
uint8_t formatVersion[4];
};
/* ucase_props_data.h is machine-generated by gencase --csource */
#define INCLUDED_FROM_UCASE_CPP
#include "ucase_props_data.h"
@ -64,13 +77,6 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* data access primitives --------------------------------------------------- */
U_CAPI const struct UCaseProps * U_EXPORT2
ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) {
*pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
*pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
return &ucase_props_singleton;
}
U_CFUNC const UTrie2 * U_EXPORT2
ucase_getTrie() {
return &ucase_props_singleton.trie;
@ -1058,8 +1064,6 @@ ucase_toFullLower(UChar32 c,
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
// Reset the output pointer in case it was uninitialized.
*pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {
@ -1144,6 +1148,7 @@ ucase_toFullLower(UChar32 c,
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
*pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/*
@ -1210,8 +1215,6 @@ toUpperOrTitle(UChar32 c,
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
// Reset the output pointer in case it was uninitialized.
*pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
@ -1249,6 +1252,7 @@ toUpperOrTitle(UChar32 c,
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/
*pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else if(c==0x0587) {
// See ICU-13416:
@ -1445,8 +1449,6 @@ ucase_toFullFolding(UChar32 c,
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
// Reset the output pointer in case it was uninitialized.
*pString=nullptr;
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
if(!UCASE_HAS_EXCEPTION(props)) {
if(UCASE_IS_UPPER_OR_TITLE(props)) {

Просмотреть файл

@ -312,21 +312,6 @@ UCaseMapFull(UChar32 c,
U_CDECL_END
/* for icuexportdata -------------------------------------------------------- */
struct UCaseProps {
void *mem; // TODO: was unused, and type UDataMemory -- remove
const int32_t *indexes;
const uint16_t *exceptions;
const uint16_t *unfold;
UTrie2 trie;
uint8_t formatVersion[4];
};
U_CAPI const struct UCaseProps * U_EXPORT2
ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength);
/* file definitions --------------------------------------------------------- */
#define UCASE_DATA_NAME "ucase"

Просмотреть файл

@ -113,6 +113,7 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
if(U_SUCCESS(*pErrorCode)) {
csm->caseLocale=UCASE_LOC_UNKNOWN;
csm->caseLocale = ucase_getCaseLocale(csm->locale);
} else {
csm->locale[0]=0;
@ -419,97 +420,6 @@ void toUpper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
namespace {
constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
/**
* Input: c is a letter I with or without acute accent.
* start is the index in src after c, and is less than segmentLimit.
* If a plain i/I is followed by a plain j/J,
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
* then we output accordingly.
*
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
*/
int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
U_ASSERT(start < segmentLimit);
int32_t index = start;
bool withAcute = false;
// If the conditions are met, then the following variables tell us what to output.
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
bool doTitleJ = false; // true if the j needs to be titlecased
int32_t unchanged2 = 0; // after the j (0 or 1)
// next character after the first letter
UChar32 c2;
c2 = src[index++];
// Is the first letter an i/I with accent?
if (c == u'I') {
if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
withAcute = true;
unchanged1 = 2; // ACUTE is 2 code units in UTF-8
if (index == segmentLimit) { return start; }
c2 = src[index++];
}
} else { // Í
withAcute = true;
}
// Is the next character a j/J?
if (c2 == u'j') {
doTitleJ = true;
} else if (c2 == u'J') {
++unchanged1;
} else {
return start;
}
// A plain i/I must be followed by a plain j/J.
// An i/I with acute must be followed by a j/J with acute.
if (withAcute) {
if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
return start;
}
if (doTitleJ) {
unchanged2 = 2; // ACUTE is 2 code units in UTF-8
} else {
unchanged1 = unchanged1 + 2; // ACUTE is 2 code units in UTF-8
}
}
// There must not be another combining mark.
if (index < segmentLimit) {
int32_t cp;
int32_t i = index;
U8_NEXT(src, i, segmentLimit, cp);
uint32_t typeMask = U_GET_GC_MASK(cp);
if ((typeMask & U_GC_M_MASK) != 0) {
return start;
}
}
// Output the rest of the Dutch IJ.
ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
start += unchanged1;
if (doTitleJ) {
ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
++start;
}
ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
U_ASSERT(start + unchanged2 == index);
return index;
}
} // namespace
U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle(
int32_t caseLocale, uint32_t options, BreakIterator *iter,
@ -594,14 +504,19 @@ ucasemap_internalUTF8ToTitle(
}
/* Special case Dutch IJ titlecasing */
if (titleLimit < index &&
caseLocale == UCASE_LOC_DUTCH) {
if (c < 0) {
c = ~c;
if (titleStart+1 < index &&
caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
if (src[titleStart+1] == 0x006A) {
ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
titleLimit++;
} else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased.
if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
sink, options, edits, errorCode)) {
return;
}
if (c == u'I' || c == u'Í') {
titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
titleLimit++;
}
}

Просмотреть файл

@ -252,10 +252,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
UTRACE_EXIT_STATUS(*status);
return NULL;
}
// If pBufferSize was NULL as the input, pBufferSize is set to &stackBufferSize in this function.
if (pBufferSize != &stackBufferSize) {
*status = U_SAFECLONE_ALLOCATED_WARNING;
}
/* record the fact that memory was allocated */
*pBufferSize = bufferSizeNeeded;
@ -320,11 +317,7 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U
return localConverter;
}
U_CAPI UConverter* U_EXPORT2
ucnv_clone(const UConverter* cnv, UErrorCode *status)
{
return ucnv_safeClone(cnv, nullptr, nullptr, status);
}
/*Decreases the reference counter in the shared immutable section of the object
*and frees the mutable part*/

Просмотреть файл

@ -254,7 +254,7 @@ currSymbolsEquiv_cleanup(void)
}
/**
* Deleter for IsoCodeEntry
* Deleter for OlsonToMetaMappingEntry
*/
static void U_CALLCONV
deleteIsoCodeEntry(void *obj) {

Просмотреть файл

@ -186,10 +186,10 @@ NULL
};
static const char* const DEPRECATED_LANGUAGES[]={
"in", "iw", "ji", "jw", "mo", NULL, NULL
"in", "iw", "ji", "jw", NULL, NULL
};
static const char* const REPLACEMENT_LANGUAGES[]={
"id", "he", "yi", "jv", "ro", NULL, NULL
"id", "he", "yi", "jv", NULL, NULL
};
/**
@ -444,7 +444,7 @@ static const char * const COUNTRIES_3[] = {
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
"WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
"WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
NULL,
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",

Просмотреть файл

@ -461,13 +461,13 @@ public:
* Option for whether to include or ignore one-way (fallback) match data.
* By default, they are included.
*
* @param matchDirection the match direction to set.
* @param direction the match direction to set.
* @return this Builder object
* @stable ICU 67
*/
Builder &setDirection(ULocMatchDirection matchDirection) {
Builder &setDirection(ULocMatchDirection direction) {
if (U_SUCCESS(errorCode_)) {
direction_ = matchDirection;
direction_ = direction;
}
return *this;
}

Просмотреть файл

@ -147,11 +147,6 @@ private:
*/
int32_t *fLookAheadMatches;
/**
* A flag to indicate if phrase based breaking is enabled.
*/
UBool fIsPhraseBreaking;
//=======================================================================
// constructors
//=======================================================================
@ -168,21 +163,6 @@ private:
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
/**
* This constructor uses the udata interface to create a BreakIterator
* whose internal tables live in a memory-mapped file. "image" is an
* ICU UDataMemory handle for the pre-compiled break iterator tables.
* @param image handle to the memory image for the break iterator data.
* Ownership of the UDataMemory handle passes to the Break Iterator,
* which will be responsible for closing it when it is no longer needed.
* @param status Information on any errors encountered.
* @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
* @see udata_open
* @see #getBinaryRules
* @internal (private)
*/
RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
/** @internal */
friend class RBBIRuleBuilder;
/** @internal */

Просмотреть файл

@ -312,12 +312,11 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
* If *pBufferSize is not enough for a stack-based safe clone,
* new memory will be allocated.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
* if pBufferSize != NULL and any allocations were necessary
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
* @return pointer to the new clone
* @deprecated ICU 69 Use ubrk_clone() instead.
*/
U_DEPRECATED UBreakIterator * U_EXPORT2
U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(
const UBreakIterator *bi,
void *stackBuffer,
@ -326,17 +325,21 @@ ubrk_safeClone(
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Thread safe cloning operation.
* @param bi iterator to be cloned
* @param status to indicate whether the operation went on smoothly or there were errors
* @return pointer to the new clone
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator *bi,
UErrorCode *status);
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**

Просмотреть файл

@ -477,7 +477,7 @@ ucnv_openCCSID(int32_t codepage,
*
* <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
* stored in the converter cache or the alias table. The only way to open further converters
* is call this function multiple times, or use the ucnv_clone() function to clone a
* is call this function multiple times, or use the ucnv_safeClone() function to clone a
* 'primary' converter.</p>
*
* <p>A future version of ICU may add alias table lookups and/or caching
@ -493,27 +493,13 @@ ucnv_openCCSID(int32_t codepage,
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
* @see udata_open
* @see ucnv_open
* @see ucnv_clone
* @see ucnv_safeClone
* @see ucnv_close
* @stable ICU 2.2
*/
U_CAPI UConverter* U_EXPORT2
ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
/**
* Thread safe converter cloning operation.
*
* You must ucnv_close() the clone.
*
* @param cnv converter to be cloned
* @param status to indicate whether the operation went on smoothly or there were errors
* @return pointer to the new clone
* @stable ICU 71
*/
U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status);
#ifndef U_HIDE_DEPRECATED_API
/**
* Thread safe converter cloning operation.
* For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
@ -546,19 +532,21 @@ U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *statu
* pointer to size of allocated space.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
* is used if pBufferSize != NULL and any allocations were necessary
* is used if any allocations were necessary.
* However, it is better to check if *pBufferSize grew for checking for
* allocations because warning codes can be overridden by subsequent
* function calls.
* @return pointer to the new clone
* @deprecated ICU 71 Use ucnv_clone() instead.
* @stable ICU 2.0
*/
U_DEPRECATED UConverter * U_EXPORT2
U_CAPI UConverter * U_EXPORT2
ucnv_safeClone(const UConverter *cnv,
void *stackBuffer,
int32_t *pBufferSize,
UErrorCode *status);
#ifndef U_HIDE_DEPRECATED_API
/**
* \def U_CNV_SAFECLONE_BUFFERSIZE
* Definition of a buffer size that is designed to be large enough for

Просмотреть файл

@ -1229,6 +1229,7 @@ public:
*/
UnicodeSet& retain(UChar32 c);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
@ -1237,9 +1238,10 @@ public:
*
* @param s the source string
* @return this object, for chaining
* @stable ICU 69
* @draft ICU 69
*/
UnicodeSet& retain(const UnicodeString &s);
#endif // U_HIDE_DRAFT_API
/**
* Removes the specified range from this set if it is present.

Просмотреть файл

@ -567,7 +567,6 @@
#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
@ -631,7 +630,6 @@
#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
#define ucnv_clone U_ICU_ENTRY_POINT_RENAME(ucnv_clone)
#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
@ -727,7 +725,6 @@
#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
#define ucol_clone U_ICU_ENTRY_POINT_RENAME(ucol_clone)
#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
@ -907,7 +904,6 @@
#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
#define udatpg_getDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormatForStyle)
#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
#define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle)
#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
@ -922,7 +918,6 @@
#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
#define udatpg_setDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormatForStyle)
#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)

Просмотреть файл

@ -628,6 +628,7 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen);
#ifndef U_HIDE_DRAFT_API
/**
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
@ -635,10 +636,11 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen);
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Removes from this set all of its elements that are contained in the
@ -669,6 +671,7 @@ uset_removeAll(USet* set, const USet* removeSet);
U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
@ -678,7 +681,7 @@ uset_retain(USet* set, UChar32 start, UChar32 end);
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainString(USet *set, const UChar *str, int32_t length);
@ -690,10 +693,11 @@ uset_retainString(USet *set, const UChar *str, int32_t length);
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Retains only the elements in this set that are contained in the
@ -737,6 +741,7 @@ uset_compact(USet* set);
U_CAPI void U_EXPORT2
uset_complement(USet* set);
#ifndef U_HIDE_DRAFT_API
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
@ -748,7 +753,7 @@ uset_complement(USet* set);
* @param set the object to be modified
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementRange(USet *set, UChar32 start, UChar32 end);
@ -761,7 +766,7 @@ uset_complementRange(USet *set, UChar32 start, UChar32 end);
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementString(USet *set, const UChar *str, int32_t length);
@ -773,10 +778,11 @@ uset_complementString(USet *set, const UChar *str, int32_t length);
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @stable ICU 69
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Complements in this set all elements contained in the specified

Просмотреть файл

@ -60,7 +60,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 71
#define U_ICU_VERSION_MAJOR_NUM 70
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@ -86,7 +86,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _71
#define U_ICU_VERSION_SUFFIX _70
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -139,7 +139,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "71.1"
#define U_ICU_VERSION "70.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -152,13 +152,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "71"
#define U_ICU_VERSION_SHORT "70"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "71.1"
#define U_ICU_DATA_VERSION "70.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

Просмотреть файл

@ -334,8 +334,7 @@ Replaceable::clone() const {
// UnicodeString overrides clone() with a real implementation
UnicodeString *
UnicodeString::clone() const {
LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
return new UnicodeString(*this);
}
//========================================
@ -1977,12 +1976,7 @@ The vector deleting destructor is already a part of UObject,
but defining it here makes sure that it is included with this object file.
This makes sure that static library dependencies are kept to a minimum.
*/
#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-function"
static void uprv_UnicodeStringDummy(void) {
delete [] (new UnicodeString[2]);
}
#pragma GCC diagnostic pop
#endif
#endif

Просмотреть файл

@ -36,12 +36,6 @@
#include "ustr_imp.h"
#include "uassert.h"
/**
* Code point for COMBINING ACUTE ACCENT
* @internal
*/
#define ACUTE u'\u0301'
U_NAMESPACE_BEGIN
namespace {
@ -402,94 +396,6 @@ U_NAMESPACE_USE
#if !UCONFIG_NO_BREAK_ITERATION
namespace {
/**
* Input: c is a letter I with or without acute accent.
* start is the index in src after c, and is less than segmentLimit.
* If a plain i/I is followed by a plain j/J,
* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
* then we output accordingly.
*
* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
*/
int32_t maybeTitleDutchIJ(const UChar *src, UChar32 c, int32_t start, int32_t segmentLimit,
UChar *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
icu::Edits *edits) {
U_ASSERT(start < segmentLimit);
int32_t index = start;
bool withAcute = false;
// If the conditions are met, then the following variables tell us what to output.
int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3)
bool doTitleJ = false; // true if the j needs to be titlecased
int32_t unchanged2 = 0; // after the j (0 or 1)
// next character after the first letter
UChar c2 = src[index++];
// Is the first letter an i/I with accent?
if (c == u'I') {
if (c2 == ACUTE) {
withAcute = true;
unchanged1 = 1;
if (index == segmentLimit) { return start; }
c2 = src[index++];
}
} else { // Í
withAcute = true;
}
// Is the next character a j/J?
if (c2 == u'j') {
doTitleJ = true;
} else if (c2 == u'J') {
++unchanged1;
} else {
return start;
}
// A plain i/I must be followed by a plain j/J.
// An i/I with acute must be followed by a j/J with acute.
if (withAcute) {
if (index == segmentLimit || src[index++] != ACUTE) { return start; }
if (doTitleJ) {
unchanged2 = 1;
} else {
++unchanged1;
}
}
// There must not be another combining mark.
if (index < segmentLimit) {
int32_t cp;
int32_t i = index;
U16_NEXT(src, i, segmentLimit, cp);
uint32_t typeMask = U_GET_GC_MASK(cp);
if ((typeMask & U_GC_M_MASK) != 0) {
return start;
}
}
// Output the rest of the Dutch IJ.
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
start += unchanged1;
if (doTitleJ) {
destIndex = appendUChar(dest, destIndex, destCapacity, u'J');
if (edits != nullptr) {
edits->addReplace(1, 1);
}
++start;
}
destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
U_ASSERT(start + unchanged2 == index);
return index;
}
} // namespace
U_CFUNC int32_t U_CALLCONV
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
UChar *dest, int32_t destCapacity,
@ -506,14 +412,14 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
csc.limit=srcLength;
int32_t destIndex=0;
int32_t prev=0;
bool isFirstIndex=true;
UBool isFirstIndex=TRUE;
/* titlecasing loop */
while(prev<srcLength) {
/* find next index where to titlecase */
int32_t index;
if(isFirstIndex) {
isFirstIndex=false;
isFirstIndex=FALSE;
index=iter->first();
} else {
index=iter->next();
@ -540,7 +446,7 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
// Stop with titleStart<titleLimit<=index
// if there is a character to be titlecased,
// or else stop with titleStart==titleLimit==index.
bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
titleStart=titleLimit;
if(titleLimit==index) {
@ -573,15 +479,27 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it
/* Special case Dutch IJ titlecasing */
if (titleStart+1 < index &&
caseLocale == UCASE_LOC_DUTCH) {
if (c < 0) {
c = ~c;
caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
if (src[titleStart+1] == 0x006A) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if (c == u'I' || c == u'Í') {
titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index,
dest, destIndex, destCapacity, options,
edits);
if(edits!=NULL) {
edits->addReplace(1, 1);
}
titleLimit++;
} else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleStart+1, 1, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
titleLimit++;
}
}

Просмотреть файл

@ -99,6 +99,14 @@ bool UVector::operator==(const UVector& other) const {
return true;
}
// TODO: delete this function once all call sites have been migrated to the
// new addElement().
void UVector::addElementX(void* obj, UErrorCode &status) {
if (ensureCapacityX(count + 1, status)) {
elements[count++].pointer = obj;
}
}
void UVector::addElement(void* obj, UErrorCode &status) {
U_ASSERT(deleter == nullptr);
if (ensureCapacity(count + 1, status)) {
@ -323,6 +331,38 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
return -1;
}
UBool UVector::ensureCapacityX(int32_t minimumCapacity, UErrorCode &status) {
if (minimumCapacity < 0) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
if (capacity < minimumCapacity) {
if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
int32_t newCap = capacity * 2;
if (newCap < minimumCapacity) {
newCap = minimumCapacity;
}
if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check
// We keep the original memory contents on bad minimumCapacity.
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap);
if (newElems == nullptr) {
// We keep the original contents on the memory failure on realloc or bad minimumCapacity.
status = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
elements = newElems;
capacity = newCap;
}
return TRUE;
}
UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
if (U_FAILURE(status)) {
return false;
@ -356,7 +396,6 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) {
}
return true;
}
/**
* Change the size of this vector as follows: If newSize is smaller,
* then truncate the array, possibly deleting held elements for i >=

Просмотреть файл

@ -123,6 +123,12 @@ public:
// java.util.Vector API
//------------------------------------------------------------
/*
* Old version of addElement, with non-standard error handling.
* Will be removed once all uses have been switched to the new addElement().
*/
void addElementX(void* obj, UErrorCode &status);
/**
* Add an element at the end of the vector.
* For use only with vectors that do not adopt their elements, which is to say,
@ -191,6 +197,12 @@ public:
inline UBool isEmpty(void) const {return count == 0;}
/*
* Old version of ensureCapacity, with non-standard error handling.
* Will be removed once all uses have been switched to the new ensureCapacity().
*/
UBool ensureCapacityX(int32_t minimumCapacity, UErrorCode &status);
UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status);
/**

Просмотреть файл

@ -83,7 +83,7 @@ void UVector32::assign(const UVector32& other, UErrorCode &ec) {
}
bool UVector32::operator==(const UVector32& other) const {
bool UVector32::operator==(const UVector32& other) {
int32_t i;
if (count != other.count) return false;
for (i=0; i<count; ++i) {

Просмотреть файл

@ -86,12 +86,12 @@ public:
* equal if they are of the same size and all elements are equal,
* as compared using this object's comparer.
*/
bool operator==(const UVector32& other) const;
bool operator==(const UVector32& other);
/**
* Equivalent to !operator==()
*/
inline bool operator!=(const UVector32& other) const;
inline bool operator!=(const UVector32& other);
//------------------------------------------------------------
// java.util.Vector API
@ -268,7 +268,7 @@ inline int32_t UVector32::lastElementi(void) const {
return elementAti(count-1);
}
inline bool UVector32::operator!=(const UVector32& other) const {
inline bool UVector32::operator!=(const UVector32& other) {
return !operator==(other);
}

23
intl/icu/source/configure поставляемый
Просмотреть файл

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for ICU 71.1.
# Generated by GNU Autoconf 2.69 for ICU 70.1.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@ -582,8 +582,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='International Components for Unicode'
PACKAGE_VERSION='71.1'
PACKAGE_STRING='ICU 71.1'
PACKAGE_VERSION='70.1'
PACKAGE_STRING='ICU 70.1'
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
PACKAGE_URL='http://icu-project.org'
@ -1375,7 +1375,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ICU 71.1 to adapt to many kinds of systems.
\`configure' configures ICU 70.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1442,7 +1442,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ICU 71.1:";;
short | recursive ) echo "Configuration of ICU 70.1:";;
esac
cat <<\_ACEOF
@ -1580,7 +1580,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ICU configure 71.1
ICU configure 70.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -2326,7 +2326,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ICU $as_me 71.1, which was
It was created by ICU $as_me 70.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -6166,6 +6166,11 @@ $as_echo "$as_me: Adding CXXFLAGS option -std=c++11" >&6;}
else
CXXFLAGS="$OLD_CXXFLAGS"
fi
case "${host}" in
*-*-solaris*)
CXXFLAGS="$OLD_CXXFLAGS"
;;
esac
fi
fi
@ -8607,7 +8612,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ICU $as_me 71.1, which was
This file was extended by ICU $as_me 70.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -8661,7 +8666,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
ICU config.status 71.1
ICU config.status 70.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

Просмотреть файл

@ -527,6 +527,11 @@ if [[ "$GXX" = yes ]]; then
else
CXXFLAGS="$OLD_CXXFLAGS"
fi
case "${host}" in
*-*-solaris*)
CXXFLAGS="$OLD_CXXFLAGS"
;;
esac
fi
fi

Просмотреть файл

@ -3,5 +3,5 @@
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
{
"cldrVersion": "41"
"cldrVersion": "40"
}

Просмотреть файл

@ -23139,7 +23139,6 @@
ໂຢຮີມ
ໂຢະ
ຣ້ອຍ
ຣະຄັງ
ຣະມາ
ຣະເມດ
ຣັກກັດ

Просмотреть файл

@ -5,237 +5,7 @@ ja{
boundaries{
line:process(dependency){"line_normal.brk"}
line_loose:process(dependency){"line_loose_cj.brk"}
line_loose_phrase:process(dependency){"line_loose_phrase_cj.brk"}
line_normal:process(dependency){"line_normal_cj.brk"}
line_normal_phrase:process(dependency){"line_normal_phrase_cj.brk"}
line_phrase:process(dependency){"line_phrase_cj.brk"}
line_strict:process(dependency){"line_cj.brk"}
line_strict_phrase:process(dependency){"line_phrase_cj.brk"}
}
extensions{
"かい",
"かしら",
"から",
"かれい",
"かれつ",
"かれる",
"かれん",
"きり",
"くらい",
"ぐらい",
"けれど",
"けれども",
"こそ",
"さえ",
"しか",
"した",
"ずつ",
"せる",
"せん",
"たい",
"たがる",
"たく",
"たら",
"たり",
"たれ",
"たれる",
"だけ",
"だに",
"だの",
"だり",
"つつ",
"てる",
"です",
"でも",
"ところが",
"ところで",
"とも",
"ない",
"なか",
"ながら",
"なく",
"なし",
"なぞ",
"など",
"なら",
"なり",
"なれる",
"なんぞ",
"ねる",
"ので",
"のに",
"のみ",
"はれる",
"ばかり",
"へる",
"ほど",
"まい",
"まう",
"まし",
"ます",
"まっ",
"まで",
"まま",
"まれ",
"もん",
"やら",
"やれる",
"よう",
"より",
"らしい",
"られる",
"れる",
"ろう",
"わっ",
"わな",
"わら",
"わり",
"わる",
"われ",
"われと",
"われる",
"わん",
"えたい",
"えて",
"える",
"けた",
"けたい",
"ける",
"させる",
"そうだ",
"っきゃ",
"っきり",
"っけ",
"っす",
"ったらしい",
"っちゅう",
"って",
"っていう",
"ってか",
"ってな",
"っと",
"っぱなし",
"っぷり",
"っぽい",
"にあう",
"にあがる",
"にあたって",
"にあたり",
"にあたりまして",
"にあたります",
"にあたる",
"において",
"におきまして",
"における",
"にかけ",
"にかけて",
"にかけまして",
"にたいして",
"にたいしまして",
"にたいします",
"にたいする",
"について",
"につき",
"につきまして",
"につけ",
"につれ",
"につれて",
"にて",
"にとって",
"にとり",
"にとりまして",
"にまつわります",
"にまつわる",
"にもかかわらず",
"にゃ",
"によって",
"により",
"によりまして",
"によります",
"による",
"にわたって",
"にわたり",
"にわたりまして",
"にわたります",
"にわたる",
"に対し",
"に対して",
"に対しまして",
"に対します",
"に対する",
"に当たって",
"に当たり",
"に当たりまして",
"に当たります",
"に当たる",
"に従い",
"に従いまして",
"に従います",
"に従う",
"に従って",
"に関し",
"に関して",
"に関しまして",
"に関します",
"に関する",
"に際し",
"に際して",
"ものの",
"ろうし",
"ろうと",
"われと",
"をの",
"をめぐって",
"をめぐりまして",
"をめぐります",
"をめぐる",
"をもちまして",
"をもって",
"を以て",
"を通して",
"を通しまして",
"を通じ",
"を通じて",
"を通じまして",
"んじゃ",
"んで",
"々宮",
"々家",
"え目",
"が丘",
"が台",
"が床",
"が浜",
"の内",
"の山公園",
"の峰",
"の森",
"の沢",
"の通り",
"の里",
"ヵ国",
"ヵ年",
"ヵ所",
"ヵ月",
"ヵ村",
"ヵ条",
"ヶ丘",
"ヶ国",
"ヶ島",
"ヶ年",
"ヶ所",
"ヶ月",
"ヶ村",
"ヶ条",
"ージ",
"ーズ",
"ータ",
"ード",
"ーニャ",
"ープランス",
"ーユ",
"ーランド",
"ーリンズ",
"ーン",
}
}

Просмотреть файл

@ -2,7 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
root{
Version{"41"}
Version{"40"}
boundaries{
grapheme:process(dependency){"char.brk"}
line:process(dependency){"line.brk"}

Просмотреть файл

@ -17,8 +17,7 @@
# line-break=loose (BCP47 -u-lb-loose) as defined for Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
# * between ID and hyphens 2010 & 2013 (both BA)
# * before 301C, 30A0 (both NS)
# * before hyphens 2010 & 2013 (both BA) and 301C, 30A0 (both NS)
# * before iteration marks 3005, 303B, 309D, 309E, 30FD, 30FE (all NS)
# * between characters of LineBreak class IN such as 2026
# * before some centered punct 203C, 2047, 2048, 2049, 30FB, FF1A, FF1B,
@ -136,7 +135,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
# AL_FOLLOW set of chars that can unconditionally follow an AL
# Needed in rules where stand-alone $CM s are treated as AL.
#
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $POX $ALPlus];
#
@ -239,7 +238,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL
# See issue ICU-20303
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $BAX $HY $NS $ALPlus $HL $IN];
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
$SP $IS / [^ $CanFollowIS $NU $CM];
$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];
@ -295,13 +294,11 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# LB 21 x (BA | HY | NS)
# BB x
#
# DO allow breaks here before $NSX, so don't include it.
# And DO allow breaks between ID and $BAX, so split out the handling of ID and do not include $BAX for them.
[$LB20NonBreaks - $ID] $CM* ($BA | $BAX | $HY | $NS);
$ID $CM* ($BA | $HY | $NS);
# DO allow breaks here before $BAX and $NSX, so don't include them
$LB20NonBreaks $CM* ($BA | $HY | $NS);
^$CM+ ($BA | $BAX | $HY | $NS);
^$CM+ ($BA | $HY | $NS);
$BB $CM* [^$CB]; # $BB x
$BB $CM* $LB20NonBreaks;
@ -377,9 +374,9 @@ $CP30 $CM* ($ALPlus | $HL | $NU);
# LB 30a Do not break between regional indicators. Break after pairs of them.
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $ZWJ {eof}];
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.

Просмотреть файл

@ -1,406 +0,0 @@
# Copyright (C) 2022 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#
# file: line_loose_phrase_cj.txt
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Unicode Standard Annex #14 (https://www.unicode.org/reports/tr14/)
# for Unicode 14.0, with the following modification:
#
# Boundaries between hyphens and following letters are suppressed when
# there is a boundary preceding the hyphen. See rule 20.9
#
# This tailors the line break behavior to correspond to CSS
# line-break=loose (BCP47 -u-lb-loose) as defined for Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
# * between ID and hyphens 2010 & 2013 (both BA)
# * before 301C, 30A0 (both NS)
# * before iteration marks 3005, 303B, 309D, 309E, 30FD, 30FE (all NS)
# * between characters of LineBreak class IN such as 2026
# * before some centered punct 203C, 2047, 2048, 2049, 30FB, FF1A, FF1B,
# FF65 (all NS) and FF01, FF1F (both EX).
# * before suffix characters with LineBreak class PO and EastAsianWidth A,F,W;
# this includes: 00B0 2030 2032 2033 2035 2103 2109 FE6A FF05 FFE0
# * after prefix characters with LineBreak class PR and EastAsianWidth A,F,W;
# this includes: 00A4 00B1 20AC 2116 FE69 FF04 FFE1 FFE5 FFE6
# It allows breaking before 201C and after 201D, for zh_Hans, zh_Hant, and ja.
#
# The content is the same as line_loose_cj.txt except the following
# 1. Add CJK into dictionary.
# 2. Add East Asian Width with class F, W and H into $ALPlus.
#
# Character Classes defined by TR 14.
#
!!chain;
!!quoted_literals_only;
$AI = [:LineBreak = Ambiguous:];
$AL = [:LineBreak = Alphabetic:];
$BAX = [\u2010 \u2013];
$BA = [[:LineBreak = Break_After:] - $BAX];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
$B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [[:LineBreak = Close_Punctuation:] \u201d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EXX = [\uFF01 \uFF1F];
$EX = [[:LineBreak = Exclamation:] - $EXX];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
# CSS Loose tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
$JV = [:LineBreak = JV:];
$JT = [:LineBreak = JT:];
$LF = [:LineBreak = Line_Feed:];
$NL = [:LineBreak = Next_Line:];
$NSX = [\u301C \u30A0 \u3005 \u303B \u309D \u309E \u30FD \u30FE \u203C \u2047 \u2048 \u2049 \u30FB \uFF1A \uFF1B \uFF65];
$NS = [[:LineBreak = Nonstarter:] - $NSX];
$NU = [:LineBreak = Numeric:];
$OP = [[:LineBreak = Open_Punctuation:] \u201c];
$POX = [\u00B0 \u2030 \u2032 \u2033 \u2035 \u2103 \u2109 \uFE6A \uFF05 \uFFE0];
$PO = [[:LineBreak = Postfix_Numeric:] - $POX];
$PRX = [\u00A4 \u00B1 \u20AC \u2116 \uFE69 \uFF04 \uFFE1 \uFFE5 \uFFE6];
$PR = [[:LineBreak = Prefix_Numeric:] - $PRX];
$QU = [[:LineBreak = Quotation:] - [\u201c\u201d]];
$RI = [:LineBreak = Regional_Indicator:];
$SA = [:LineBreak = Complex_Context:];
$SG = [:LineBreak = Surrogate:];
$SP = [:LineBreak = Space:];
$SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [:LineBreak = ZWJ:];
# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
# without a formal name. Because ICU rules require multiple uses of the expressions,
# give them a single definition with a name
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
$CMX = [[$CM] - [$ZWJ]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context (SA) and $dictionaryCJK.
# Add CJK dictionary
$Han = [:Han:];
$Katakana = [:Katakana:];
$Hiragana = [:Hiragana:];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana \u30fc];
$dictionaryCJK = [$KanaKanji $HangulSyllable];
$dictionary = [$ComplexContext $dictionaryCJK];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
# Let fullwidth-ASCII digits and letters be part of words.
$FW_alphanum = [\uff10-\uff19\uff21-\uff3a\uff41-\uff5a];
$ALPlus = [$AL $AI $SG $XX $FW_alphanum [$dictionary-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
#
# CAN_CM is the set of characters that may combine with CM combining chars.
# Note that Linebreak UAX 14's concept of a combining char and the rules
# for what they can combine with are _very_ different from the rest of Unicode.
#
# Note that $CM itself is left out of this set. If CM is needed as a base
# it must be listed separately in the rule.
#
$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM]; # Bases that can take CMs
$CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
#
# AL_FOLLOW set of chars that can unconditionally follow an AL
# Needed in rules where stand-alone $CM s are treated as AL.
#
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $BAX $HY $NS $IN $NU $PR $PO $POX $ALPlus];
#
# Rule LB 4, 5 Mandatory (Hard) breaks.
#
$LB4Breaks = [$BK $CR $LF $NL];
$LB4NonBreaks = [^$BK $CR $LF $NL $CM];
$CR $LF {100};
#
# LB 6 Do not break before hard line breaks.
#
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks.
$CAN_CM $CM* $LB4Breaks {100};
^$CM+ $LB4Breaks {100};
# LB 7 x SP
# x ZW
$LB4NonBreaks [$SP $ZW];
$CAN_CM $CM* [$SP $ZW];
^$CM+ [$SP $ZW];
#
# LB 8 Break after zero width space
# ZW SP* ÷
#
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
$ZW $SP* / [^$SP $ZW $LB4Breaks];
# LB 8a ZWJ x Do not break Emoji ZWJ sequences.
#
$ZWJ [^$CM];
# LB 9 Combining marks. X $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
# $CM not covered by the above needs to behave like $AL
# See definition of $CAN_CM.
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules.
^$CM+;
#
# LB 11 Do not break before or after WORD JOINER & related characters.
#
$CAN_CM $CM* $WJ;
$LB8NonBreaks $WJ;
^$CM+ $WJ;
$WJ $CM* .;
#
# LB 12 Do not break after NBSP and related characters.
# GL x
#
$GL $CM* .;
#
# LB 12a Do not break before NBSP and related characters ...
# [^SP BA HY] x GL
#
[[$LB8NonBreaks] - [$SP $BA $BAX $HY]] $CM* $GL;
^$CM+ $GL;
# LB 13 Don't break before ']' or '!' or '/', even after spaces.
#
# Do not include $EXX here
$LB8NonBreaks $CL;
$CAN_CM $CM* $CL;
^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $CP;
$CAN_CM $CM* $CP;
^$CM+ $CP; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $EX;
$CAN_CM $CM* $EX;
^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $SY;
$CAN_CM $CM* $SY;
^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL
#
# LB 14 Do not break after OP, even after spaces
# Note subtle interaction with "SP IS /" rules in LB14a.
# This rule consumes the SP, chaining happens on the IS, effectivley overriding the SP IS rules,
# which is the desired behavior.
#
$OP $CM* $SP* .;
$OP $CM* $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL
# by rule 8, CM following a SP is stand-alone.
# LB 14a Force a break before start of a number with a leading decimal pt, e.g. " .23"
# Note: would be simpler to express as "$SP / $IS $CM* $NU;", but ICU rules have limitations.
# See issue ICU-20303
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $BAX $HY $NS $ALPlus $HL $IN];
$SP $IS / [^ $CanFollowIS $NU $CM];
$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];
#
# LB 14b Do not break before numeric separators (IS), even after spaces.
[$LB8NonBreaks - $SP] $IS;
$SP $IS $CM* [$CanFollowIS {eof}];
$SP $IS $CM* $ZWJ [^$CM $NU];
$CAN_CM $CM* $IS;
^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL
# LB 15
$QU $CM* $SP* $OP;
# LB 16
# Do not break between closing punctuation and $NS, even with intervening spaces
# But DO allow a break between closing punctuation and $NSX, don't include it here
($CL | $CP) $CM* $SP* $NS;
# LB 17
$B2 $CM* $SP* $B2;
#
# LB 18 Break after spaces.
#
$LB18NonBreaks = [$LB8NonBreaks - [$SP]];
$LB18Breaks = [$LB8Breaks $SP];
# LB 19
# x QU
$LB18NonBreaks $CM* $QU;
^$CM+ $QU;
# QU x
$QU $CM* .;
# LB 20
# <break> $CB
# $CB <break>
#
$LB20NonBreaks = [$LB18NonBreaks - $CB];
# LB 20.09 Don't break between Hyphens and Letters when there is a break preceding the hyphen.
# Originally added as a Finnish tailoring, now promoted to default ICU behavior.
# Note: this is not default UAX-14 behaviour. See issue ICU-8151.
#
^($HY | $HH) $CM* $ALPlus;
# LB 21 x (BA | HY | NS)
# BB x
#
# DO allow breaks here before $NSX, so don't include it.
# And DO allow breaks between ID and $BAX, so split out the handling of ID and do not include $BAX for them.
[$LB20NonBreaks - $ID] $CM* ($BA | $BAX | $HY | $NS);
$ID $CM* ($BA | $HY | $NS);
^$CM+ ($BA | $BAX | $HY | $NS);
$BB $CM* [^$CB]; # $BB x
$BB $CM* $LB20NonBreaks;
# LB 21a Don't break after Hebrew + Hyphen
# HL (HY | BA) x
#
$HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
# LB 21b (forward) Don't break between SY and HL
# (break between HL and SY already disallowed by LB 13 above)
$SY $CM* $HL;
# LB 22 Do not break before ellipses
#
[$LB20NonBreaks - $IN] $CM* $IN; # line_loose tailoring
^$CM+ $IN;
# LB 23
#
($ALPlus | $HL) $CM* $NU;
^$CM+ $NU; # Rule 10, any otherwise unattached CM behaves as AL
$NU $CM* ($ALPlus | $HL);
# LB 23a
# Do not include $POX here
#
$PR $CM* ($ID | $EB | $EM);
($ID | $EB | $EM) $CM* $PO;
#
# LB 24
#
# Do not include $PRX here
($PR | $PO | $POX) $CM* ($ALPlus | $HL);
($ALPlus | $HL) $CM* ($PR | $PO | $POX); # TODO: should this be ($PR | $PRX | $PO)
^$CM+ ($PR | $PO | $POX); # Rule 10, any otherwise unattached CM behaves as AL
#
# LB 25 Numbers.
#
# Here do not include $PRX at the beginning or $POX at the end
(($PR | $PO | $POX) $CM*)? (($OP | $HY) $CM*)? ($IS $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PRX | $PO))?;
# LB 26 Do not break a Korean syllable
#
$JL $CM* ($JL | $JV | $H2 | $H3);
($JV | $H2) $CM* ($JV | $JT);
($JT | $H3) $CM* $JT;
# LB 27 Treat korean Syllable Block the same as ID (don't break it)
# Do not include $POX or $PRX here
($JL | $JV | $JT | $H2 | $H3) $CM* $PO;
$PR $CM* ($JL | $JV | $JT | $H2 | $H3);
# LB 28 Do not break between alphabetics
#
($ALPlus | $HL) $CM* ($ALPlus | $HL);
^$CM+ ($ALPlus | $HL); # The $CM+ is from rule 10, an unattached CM is treated as AL
# LB 29
$IS $CM* ($ALPlus | $HL);
# LB 30
($ALPlus | $HL | $NU) $CM* $OP30;
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL.
$CP30 $CM* ($ALPlus | $HL | $NU);
# LB 30a Do not break between regional indicators. Break after pairs of them.
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $BAX $HY $NS $IN $ZWJ {eof}];
# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;

Просмотреть файл

@ -17,7 +17,7 @@
# line-break=normal (BCP47 -u-lb-normal) as defined for Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
# * before 301C, 30A0 (both NS)
# * before hyphens 2010 & 2013 (both BA) and 301C, 30A0 (both NS)
# It allows breaking before 201C and after 201D, for zh_Hans, zh_Hant, and ja.
#
@ -29,7 +29,8 @@
$AI = [:LineBreak = Ambiguous:];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$BAX = [\u2010 \u2013];
$BA = [[:LineBreak = Break_After:] - $BAX];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
@ -183,7 +184,7 @@ $GL $CM* .;
# LB 12a Do not break before NBSP and related characters ...
# [^SP BA HY] x GL
#
[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GL;
[[$LB8NonBreaks] - [$SP $BA $BAX $HY]] $CM* $GL;
^$CM+ $GL;
@ -281,7 +282,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
# LB 21 x (BA | HY | NS)
# BB x
#
# DO allow breaks here before $NSX, so don't include it
# DO allow breaks here before $BAX and $NSX, so don't include them
$LB20NonBreaks $CM* ($BA | $HY | $NS);
@ -293,7 +294,7 @@ $BB $CM* $LB20NonBreaks;
# LB 21a Don't break after Hebrew + Hyphen
# HL (HY | BA) x
#
$HL $CM* ($HY | $BA) $CM* [^$CB]?;
$HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
# LB 21b (forward) Don't break between SY and HL
# (break between HL and SY already disallowed by LB 13 above)

Просмотреть файл

@ -1,385 +0,0 @@
# Copyright (C) 2022 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#
# file: line_normal_phrase_cj.txt
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Unicode Standard Annex #14 (https://www.unicode.org/reports/tr14/)
# for Unicode 14.0, with the following modification:
#
# Boundaries between hyphens and following letters are suppressed when
# there is a boundary preceding the hyphen. See rule 20.9
#
# This tailors the line break behavior to correspond to CSS
# line-break=normal (BCP47 -u-lb-normal) as defined for Chinese & Japanese.
# It sets characters of class CJ to behave like ID.
# In addition, it allows breaks:
# * before 301C, 30A0 (both NS)
# It allows breaking before 201C and after 201D, for zh_Hans, zh_Hant, and ja.
#
# The content is the same as line_normal_cj.txt except the following
# 1. Add CJK into dictionary.
# 2. Add East Asian Width with class F, W and H into $ALPlus.
#
# Character Classes defined by TR 14.
#
!!chain;
!!quoted_literals_only;
$AI = [:LineBreak = Ambiguous:];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
$B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [[:LineBreak = Close_Punctuation:] \u201d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
# CSS Normal tailoring: CJ resolves to ID
$ID = [[:LineBreak = Ideographic:] $CJ];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
$JV = [:LineBreak = JV:];
$JT = [:LineBreak = JT:];
$LF = [:LineBreak = Line_Feed:];
$NL = [:LineBreak = Next_Line:];
$NSX = [\u301C \u30A0];
$NS = [[:LineBreak = Nonstarter:] - $NSX];
$NU = [:LineBreak = Numeric:];
$OP = [[:LineBreak = Open_Punctuation:] \u201c];
$PO = [:LineBreak = Postfix_Numeric:];
$PR = [:LineBreak = Prefix_Numeric:];
$QU = [[:LineBreak = Quotation:] - [\u201c\u201d]];
$RI = [:LineBreak = Regional_Indicator:];
$SA = [:LineBreak = Complex_Context:];
$SG = [:LineBreak = Surrogate:];
$SP = [:LineBreak = Space:];
$SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [:LineBreak = ZWJ:];
# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
# without a formal name. Because ICU rules require multiple uses of the expressions,
# give them a single definition with a name
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
$CMX = [[$CM] - [$ZWJ]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context (SA) and $dictionaryCJK.
# Add CJK dictionary
$Han = [:Han:];
$Katakana = [:Katakana:];
$Hiragana = [:Hiragana:];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana \u30fc];
$dictionaryCJK = [$KanaKanji $HangulSyllable];
$dictionary = [$ComplexContext $dictionaryCJK];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
# Let fullwidth-ASCII digits and letters be part of words.
$FW_alphanum = [\uff10-\uff19\uff21-\uff3a\uff41-\uff5a];
$ALPlus = [$AL $AI $SG $XX $FW_alphanum [$dictionary-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
#
# CAN_CM is the set of characters that may combine with CM combining chars.
# Note that Linebreak UAX 14's concept of a combining char and the rules
# for what they can combine with are _very_ different from the rest of Unicode.
#
# Note that $CM itself is left out of this set. If CM is needed as a base
# it must be listed separately in the rule.
#
$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM]; # Bases that can take CMs
$CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
#
# AL_FOLLOW set of chars that can unconditionally follow an AL
# Needed in rules where stand-alone $CM s are treated as AL.
#
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
#
# Rule LB 4, 5 Mandatory (Hard) breaks.
#
$LB4Breaks = [$BK $CR $LF $NL];
$LB4NonBreaks = [^$BK $CR $LF $NL $CM];
$CR $LF {100};
#
# LB 6 Do not break before hard line breaks.
#
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks.
$CAN_CM $CM* $LB4Breaks {100};
^$CM+ $LB4Breaks {100};
# LB 7 x SP
# x ZW
$LB4NonBreaks [$SP $ZW];
$CAN_CM $CM* [$SP $ZW];
^$CM+ [$SP $ZW];
#
# LB 8 Break after zero width space
# ZW SP* ÷
#
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
$ZW $SP* / [^$SP $ZW $LB4Breaks];
# LB 8a ZWJ x Do not break Emoji ZWJ sequences.
#
$ZWJ [^$CM];
# LB 9 Combining marks. X $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
# $CM not covered by the above needs to behave like $AL
# See definition of $CAN_CM.
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules.
^$CM+;
#
# LB 11 Do not break before or after WORD JOINER & related characters.
#
$CAN_CM $CM* $WJ;
$LB8NonBreaks $WJ;
^$CM+ $WJ;
$WJ $CM* .;
#
# LB 12 Do not break after NBSP and related characters.
# GL x
#
$GL $CM* .;
#
# LB 12a Do not break before NBSP and related characters ...
# [^SP BA HY] x GL
#
[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GL;
^$CM+ $GL;
# LB 13 Don't break before ']' or '!' or '/', even after spaces.
#
$LB8NonBreaks $CL;
$CAN_CM $CM* $CL;
^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $CP;
$CAN_CM $CM* $CP;
^$CM+ $CP; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $EX;
$CAN_CM $CM* $EX;
^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $SY;
$CAN_CM $CM* $SY;
^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL
#
# LB 14 Do not break after OP, even after spaces
# Note subtle interaction with "SP IS /" rules in LB14a.
# This rule consumes the SP, chaining happens on the IS, effectivley overriding the SP IS rules,
# which is the desired behavior.
#
$OP $CM* $SP* .;
$OP $CM* $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL
# by rule 8, CM following a SP is stand-alone.
# LB 14a Force a break before start of a number with a leading decimal pt, e.g. " .23"
# Note: would be simpler to express as "$SP / $IS $CM* $NU;", but ICU rules have limitations.
# See issue ICU-20303
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
$SP $IS / [^ $CanFollowIS $NU $CM];
$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];
#
# LB 14b Do not break before numeric separators (IS), even after spaces.
[$LB8NonBreaks - $SP] $IS;
$SP $IS $CM* [$CanFollowIS {eof}];
$SP $IS $CM* $ZWJ [^$CM $NU];
$CAN_CM $CM* $IS;
^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL
# LB 15
$QU $CM* $SP* $OP;
# LB 16
# Do not break between closing punctuation and $NS, even with intervening spaces
# But DO allow a break between closing punctuation and $NSX, don't include it here
($CL | $CP) $CM* $SP* $NS;
# LB 17
$B2 $CM* $SP* $B2;
#
# LB 18 Break after spaces.
#
$LB18NonBreaks = [$LB8NonBreaks - [$SP]];
$LB18Breaks = [$LB8Breaks $SP];
# LB 19
# x QU
$LB18NonBreaks $CM* $QU;
^$CM+ $QU;
# QU x
$QU $CM* .;
# LB 20
# <break> $CB
# $CB <break>
#
$LB20NonBreaks = [$LB18NonBreaks - $CB];
# LB 20.09 Don't break between Hyphens and Letters when there is a break preceding the hyphen.
# Originally added as a Finnish tailoring, now promoted to default ICU behavior.
# Note: this is not default UAX-14 behaviour. See issue ICU-8151.
#
^($HY | $HH) $CM* $ALPlus;
# LB 21 x (BA | HY | NS)
# BB x
#
# DO allow breaks here before $NSX, so don't include it
$LB20NonBreaks $CM* ($BA | $HY | $NS);
^$CM+ ($BA | $HY | $NS);
$BB $CM* [^$CB]; # $BB x
$BB $CM* $LB20NonBreaks;
# LB 21a Don't break after Hebrew + Hyphen
# HL (HY | BA) x
#
$HL $CM* ($HY | $BA) $CM* [^$CB]?;
# LB 21b (forward) Don't break between SY and HL
# (break between HL and SY already disallowed by LB 13 above)
$SY $CM* $HL;
# LB 22 Do not break before ellipses
#
$LB20NonBreaks $CM* $IN;
^$CM+ $IN;
# LB 23
#
($ALPlus | $HL) $CM* $NU;
^$CM+ $NU; # Rule 10, any otherwise unattached CM behaves as AL
$NU $CM* ($ALPlus | $HL);
# LB 23a
#
$PR $CM* ($ID | $EB | $EM);
($ID | $EB | $EM) $CM* $PO;
#
# LB 24
#
($PR | $PO) $CM* ($ALPlus | $HL);
($ALPlus | $HL) $CM* ($PR | $PO);
^$CM+ ($PR | $PO); # Rule 10, any otherwise unattached CM behaves as AL
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? ($IS $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable
#
$JL $CM* ($JL | $JV | $H2 | $H3);
($JV | $H2) $CM* ($JV | $JT);
($JT | $H3) $CM* $JT;
# LB 27 Treat korean Syllable Block the same as ID (don't break it)
($JL | $JV | $JT | $H2 | $H3) $CM* $PO;
$PR $CM* ($JL | $JV | $JT | $H2 | $H3);
# LB 28 Do not break between alphabetics
#
($ALPlus | $HL) $CM* ($ALPlus | $HL);
^$CM+ ($ALPlus | $HL); # The $CM+ is from rule 10, an unattached CM is treated as AL
# LB 29
$IS $CM* ($ALPlus | $HL);
# LB 30
($ALPlus | $HL | $NU) $CM* $OP30;
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL.
$CP30 $CM* ($ALPlus | $HL | $NU);
# LB 30a Do not break between regional indicators. Break after pairs of them.
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;

Просмотреть файл

@ -1,377 +0,0 @@
# Copyright (C) 2022 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#
# file: line_phrase_cj.txt
#
# Line Breaking Rules
# Implement default line breaking as defined by
# Unicode Standard Annex #14 (https://www.unicode.org/reports/tr14/)
# for Unicode 14.0, with the following modification:
#
# Boundaries between hyphens and following letters are suppressed when
# there is a boundary preceding the hyphen. See rule 20.9
#
# This corresponds to CSS line-break-word-handling=phrase (BCP47 -u-lw-phrase).
# It sets characters of class CJ to behave like NS.
# It allows breaking before 201C and after 201D, for zh_Hans, zh_Hant, and ja.
#
# The content is the same as line_cj.txt except the following
# 1. Add CJK into dictionary.
# 2. Add East Asian Width with class F, W and H into $ALPlus.
#
# Character Classes defined by TR 14.
#
!!chain;
!!quoted_literals_only;
$AI = [:LineBreak = Ambiguous:];
$AL = [:LineBreak = Alphabetic:];
$BA = [:LineBreak = Break_After:];
$HH = [\u2010]; # \u2010 is HYPHEN, default line break is BA.
$BB = [:LineBreak = Break_Before:];
$BK = [:LineBreak = Mandatory_Break:];
$B2 = [:LineBreak = Break_Both:];
$CB = [:LineBreak = Contingent_Break:];
$CJ = [:LineBreak = Conditional_Japanese_Starter:];
$CL = [[:LineBreak = Close_Punctuation:] \u201d];
# $CM = [:LineBreak = Combining_Mark:];
$CP = [:LineBreak = Close_Parenthesis:];
$CR = [:LineBreak = Carriage_Return:];
$EB = [:LineBreak = EB:];
$EM = [:LineBreak = EM:];
$EX = [:LineBreak = Exclamation:];
$GL = [:LineBreak = Glue:];
$HL = [:LineBreak = Hebrew_Letter:];
$HY = [:LineBreak = Hyphen:];
$H2 = [:LineBreak = H2:];
$H3 = [:LineBreak = H3:];
$ID = [:LineBreak = Ideographic:];
$IN = [:LineBreak = Inseperable:];
$IS = [:LineBreak = Infix_Numeric:];
$JL = [:LineBreak = JL:];
$JV = [:LineBreak = JV:];
$JT = [:LineBreak = JT:];
$LF = [:LineBreak = Line_Feed:];
$NL = [:LineBreak = Next_Line:];
# NS includes CJ for CSS strict line breaking.
$NS = [[:LineBreak = Nonstarter:] $CJ];
$NU = [:LineBreak = Numeric:];
$OP = [[:LineBreak = Open_Punctuation:] \u201c];
$PO = [:LineBreak = Postfix_Numeric:];
$PR = [:LineBreak = Prefix_Numeric:];
$QU = [[:LineBreak = Quotation:] - [\u201c\u201d]];
$RI = [:LineBreak = Regional_Indicator:];
$SA = [:LineBreak = Complex_Context:];
$SG = [:LineBreak = Surrogate:];
$SP = [:LineBreak = Space:];
$SY = [:LineBreak = Break_Symbols:];
$WJ = [:LineBreak = Word_Joiner:];
$XX = [:LineBreak = Unknown:];
$ZW = [:LineBreak = ZWSpace:];
$ZWJ = [:LineBreak = ZWJ:];
# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
# without a formal name. Because ICU rules require multiple uses of the expressions,
# give them a single definition with a name
$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
$ExtPictUnassigned = [\p{Extended_Pictographic} & \p{Cn}];
# By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
# list it in the numerous rules that use CM.
# By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
$CM = [[:LineBreak = Combining_Mark:] $ZWJ [$SA & [[:Mn:][:Mc:]]]];
$CMX = [[$CM] - [$ZWJ]];
# Dictionary character set, for triggering language-based break engines. Currently
# limited to LineBreak=Complex_Context (SA) and $dictionaryCJK.
# Add CJK dictionary
$Han = [:Han:];
$Katakana = [:Katakana:];
$Hiragana = [:Hiragana:];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana \u30fc];
$dictionaryCJK = [$KanaKanji $HangulSyllable];
$dictionary = [$ComplexContext $dictionaryCJK];
#
# Rule LB1. By default, treat AI (characters with ambiguous east Asian width),
# SA (Dictionary chars, excluding Mn and Mc)
# SG (Unpaired Surrogates)
# XX (Unknown, unassigned)
# as $AL (Alphabetic)
#
# Let fullwidth-ASCII digits and letters be part of words.
$FW_alphanum = [\uff10-\uff19\uff21-\uff3a\uff41-\uff5a];
$ALPlus = [$AL $AI $SG $XX $FW_alphanum [$dictionary-[[:Mn:][:Mc:]]]];
## -------------------------------------------------
#
# CAN_CM is the set of characters that may combine with CM combining chars.
# Note that Linebreak UAX 14's concept of a combining char and the rules
# for what they can combine with are _very_ different from the rest of Unicode.
#
# Note that $CM itself is left out of this set. If CM is needed as a base
# it must be listed separately in the rule.
#
$CAN_CM = [^$SP $BK $CR $LF $NL $ZW $CM]; # Bases that can take CMs
$CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM]; # Bases that can't take CMs
#
# AL_FOLLOW set of chars that can unconditionally follow an AL
# Needed in rules where stand-alone $CM s are treated as AL.
#
$AL_FOLLOW = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
#
# Rule LB 4, 5 Mandatory (Hard) breaks.
#
$LB4Breaks = [$BK $CR $LF $NL];
$LB4NonBreaks = [^$BK $CR $LF $NL $CM];
$CR $LF {100};
#
# LB 6 Do not break before hard line breaks.
#
$LB4NonBreaks? $LB4Breaks {100}; # LB 5 do not break before hard breaks.
$CAN_CM $CM* $LB4Breaks {100};
^$CM+ $LB4Breaks {100};
# LB 7 x SP
# x ZW
$LB4NonBreaks [$SP $ZW];
$CAN_CM $CM* [$SP $ZW];
^$CM+ [$SP $ZW];
#
# LB 8 Break after zero width space
# ZW SP* ÷
#
$LB8Breaks = [$LB4Breaks $ZW];
$LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
$ZW $SP* / [^$SP $ZW $LB4Breaks];
# LB 8a ZWJ x Do not break Emoji ZWJ sequences.
#
$ZWJ [^$CM];
# LB 9 Combining marks. X $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
# $CM not covered by the above needs to behave like $AL
# See definition of $CAN_CM.
$CAN_CM $CM+; # Stick together any combining sequences that don't match other rules.
^$CM+;
#
# LB 11 Do not break before or after WORD JOINER & related characters.
#
$CAN_CM $CM* $WJ;
$LB8NonBreaks $WJ;
^$CM+ $WJ;
$WJ $CM* .;
#
# LB 12 Do not break after NBSP and related characters.
# GL x
#
$GL $CM* .;
#
# LB 12a Do not break before NBSP and related characters ...
# [^SP BA HY] x GL
#
[[$LB8NonBreaks] - [$SP $BA $HY]] $CM* $GL;
^$CM+ $GL;
# LB 13 Don't break before ']' or '!' or '/', even after spaces.
#
$LB8NonBreaks $CL;
$CAN_CM $CM* $CL;
^$CM+ $CL; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $CP;
$CAN_CM $CM* $CP;
^$CM+ $CP; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $EX;
$CAN_CM $CM* $EX;
^$CM+ $EX; # by rule 10, stand-alone CM behaves as AL
$LB8NonBreaks $SY;
$CAN_CM $CM* $SY;
^$CM+ $SY; # by rule 10, stand-alone CM behaves as AL
#
# LB 14 Do not break after OP, even after spaces
# Note subtle interaction with "SP IS /" rules in LB14a.
# This rule consumes the SP, chaining happens on the IS, effectivley overriding the SP IS rules,
# which is the desired behavior.
#
$OP $CM* $SP* .;
$OP $CM* $SP+ $CM+ $AL_FOLLOW?; # by rule 10, stand-alone CM behaves as AL
# by rule 8, CM following a SP is stand-alone.
# LB 14a Force a break before start of a number with a leading decimal pt, e.g. " .23"
# Note: would be simpler to express as "$SP / $IS $CM* $NU;", but ICU rules have limitations.
# See issue ICU-20303
$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
$SP $IS / [^ $CanFollowIS $NU $CM];
$SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];
#
# LB 14b Do not break before numeric separators (IS), even after spaces.
[$LB8NonBreaks - $SP] $IS;
$SP $IS $CM* [$CanFollowIS {eof}];
$SP $IS $CM* $ZWJ [^$CM $NU];
$CAN_CM $CM* $IS;
^$CM+ $IS; # by rule 10, stand-alone CM behaves as AL
# LB 15
$QU $CM* $SP* $OP;
# LB 16
($CL | $CP) $CM* $SP* $NS;
# LB 17
$B2 $CM* $SP* $B2;
#
# LB 18 Break after spaces.
#
$LB18NonBreaks = [$LB8NonBreaks - [$SP]];
$LB18Breaks = [$LB8Breaks $SP];
# LB 19
# x QU
$LB18NonBreaks $CM* $QU;
^$CM+ $QU;
# QU x
$QU $CM* .;
# LB 20
# <break> $CB
# $CB <break>
#
$LB20NonBreaks = [$LB18NonBreaks - $CB];
# LB 20.09 Don't break between Hyphens and Letters when there is a break preceding the hyphen.
# Originally added as a Finnish tailoring, now promoted to default ICU behavior.
# Note: this is not default UAX-14 behaviour. See issue ICU-8151.
#
^($HY | $HH) $CM* $ALPlus;
# LB 21 x (BA | HY | NS)
# BB x
#
$LB20NonBreaks $CM* ($BA | $HY | $NS);
^$CM+ ($BA | $HY | $NS);
$BB $CM* [^$CB]; # $BB x
$BB $CM* $LB20NonBreaks;
# LB 21a Don't break after Hebrew + Hyphen
# HL (HY | BA) x
#
$HL $CM* ($HY | $BA) $CM* [^$CB]?;
# LB 21b (forward) Don't break between SY and HL
# (break between HL and SY already disallowed by LB 13 above)
$SY $CM* $HL;
# LB 22 Do not break before ellipses
#
$LB20NonBreaks $CM* $IN;
^$CM+ $IN;
# LB 23
#
($ALPlus | $HL) $CM* $NU;
^$CM+ $NU; # Rule 10, any otherwise unattached CM behaves as AL
$NU $CM* ($ALPlus | $HL);
# LB 23a
#
$PR $CM* ($ID | $EB | $EM);
($ID | $EB | $EM) $CM* $PO;
#
# LB 24
#
($PR | $PO) $CM* ($ALPlus | $HL);
($ALPlus | $HL) $CM* ($PR | $PO);
^$CM+ ($PR | $PO); # Rule 10, any otherwise unattached CM behaves as AL
#
# LB 25 Numbers.
#
(($PR | $PO) $CM*)? (($OP | $HY) $CM*)? ($IS $CM*)? $NU ($CM* ($NU | $SY | $IS))*
($CM* ($CL | $CP))? ($CM* ($PR | $PO))?;
# LB 26 Do not break a Korean syllable
#
$JL $CM* ($JL | $JV | $H2 | $H3);
($JV | $H2) $CM* ($JV | $JT);
($JT | $H3) $CM* $JT;
# LB 27 Treat korean Syllable Block the same as ID (don't break it)
($JL | $JV | $JT | $H2 | $H3) $CM* $PO;
$PR $CM* ($JL | $JV | $JT | $H2 | $H3);
# LB 28 Do not break between alphabetics
#
($ALPlus | $HL) $CM* ($ALPlus | $HL);
^$CM+ ($ALPlus | $HL); # The $CM+ is from rule 10, an unattached CM is treated as AL
# LB 29
$IS $CM* ($ALPlus | $HL);
# LB 30
($ALPlus | $HL | $NU) $CM* $OP30;
^$CM+ $OP30; # The $CM+ is from rule 10, an unattached CM is treated as AL.
$CP30 $CM* ($ALPlus | $HL | $NU);
# LB 30a Do not break between regional indicators. Break after pairs of them.
# Tricky interaction with LB8a: ZWJ x . together with ZWJ acting like a CM.
$RI $CM* $RI / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
# because of the chain-out behavior difference. The rule must chain out only from the [set characters],
# not from the preceding $RI or $CM, which it would be able to do if the set were optional.
# LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
$EB $CM* $EM;
$ExtPictUnassigned $CM* $EM;
# LB 31 Break everywhere else.
# Match a single code point if no other rule applies.
.;

Просмотреть файл

@ -138,44 +138,7 @@ export ICU4C_DIR=$HOME/icu-myfork/icu4c
export ICU4J_ROOT=$HOME/icu-myfork/icu4j
export TOOLS_ROOT=$HOME/icu-myfork/tools
# 1d. Directory for logs/notes (create if does not exist)
export NOTES=...(some directory)...
mkdir -p $NOTES
# 2a. Configure ICU4C, build and test without new data first, to verify that
# there are no pre-existing errors. Here <platform> is the runConfigureICU
# code for the platform you are building, e.g. Linux, MacOSX, Cygwin.
# (optionally build with debug enabled)
cd $ICU4C_DIR/source
./runConfigureICU [--enable-debug] <platform>
make clean
make check 2>&1 | tee $NOTES/icu4c-oldData-makeCheck.txt
# 2b. Now with ICU4J, build and test without new data first, to verify that
# there are no pre-existing errors (or at least to have the pre-existing errors
# as a base for comparison):
cd $ICU4J_ROOT
ant clean
ant check 2>&1 | tee $NOTES/icu4j-oldData-antCheck.txt
# 3. Make pre-adjustments as necessary
# 3a. Copy latest relevant CLDR dtds to ICU
cp -p $CLDR_DIR/common/dtd/ldml.dtd $ICU4C_DIR/source/data/dtd/cldr/common/dtd/
cp -p $CLDR_DIR/common/dtd/ldmlICU.dtd $ICU4C_DIR/source/data/dtd/cldr/common/dtd/
# 3b. Update the cldr-icu tooling to use the latest tagged version of ICU
open $TOOLS_ROOT/cldr/cldr-to-icu/pom.xml
# search for icu4j-for-cldr and update to the latest tagged version per instructions
# 3c. Update the build for any new icu version, added locales, etc.
open $TOOLS_ROOT/cldr/cldr-to-icu/build-icu-data.xml
# update icuVersion, icuDataVersion if necessary
# update lists of locales to include if necessary
# 4. Build and install the CLDR jar
# 2. Build and install the CLDR jar
cd $TOOLS_ROOT/cldr
ant install-cldr-libs
@ -183,7 +146,16 @@ ant install-cldr-libs
See the $TOOLS_ROOT/cldr/lib/README.txt file for more information on the CLDR
jar and the install-cldr-jars.sh script.
# 5a. Generate the CLDR production data. This process uses ant with ICU's
# 3. Configure ICU4C, build and test without new data first, to verify that
# there are no pre-existing errors. Here <platform> is the runConfigureICU
# code for the platform you are building, e.g. Linux, MacOSX, Cygwin.
cd $ICU4C_DIR/source
./runConfigureICU <platform>
make clean
make check 2>&1 | tee /tmp/icu4c-oldData-makeCheck.txt
# 4a. Generate the CLDR production data. This process uses ant with ICU's
# data/build.xml
#
# Running "ant cleanprod" is necessary to clean out the production data directory
@ -195,9 +167,9 @@ jar and the install-cldr-jars.sh script.
cd $ICU4C_DIR/source/data
ant cleanprod
ant setup
ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt
ant proddata 2>&1 | tee /tmp/cldr-newData-proddataLog.txt
# 5b. Build the new ICU4C data files; these include .txt files and .py files.
# 4b. Build the new ICU4C data files; these include .txt files and .py files.
# These new files will replace whatever was already present in the ICU4C sources.
# This process uses the LdmlConverter in $TOOLS_ROOT/cldr/cldr-to-icu/;
# see $TOOLS_ROOT/cldr/cldr-to-icu/README.txt
@ -215,58 +187,59 @@ ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt
# build-icu-data.xml file, such as adding new locales etc.
cd $TOOLS_ROOT/cldr/cldr-to-icu
ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee $NOTES/cldr-newData-builddataLog.txt
ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee /tmp/cldr-newData-builddataLog.txt
# 5c. Update the CLDR testData files needed by ICU4C and ICU4J tests, ensuring
# 4c. Update the CLDR testData files needed by ICU4C and ICU4J tests, ensuring
# they're representative of the newest CLDR data.
cd $TOOLS_ROOT/cldr
ant copy-cldr-testdata
# 5d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
# into icu4c/source/test/testdata/localeCanonicalization.txt
# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
# and add the following line to the beginning of these two files
# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
# 5e. For the time being, manually re-add the lstm entries in data/brkitr/root.txt
# 4e. For the time being, manually re-add the lstm entries in data/brkitr/root.txt
open $ICU4C_DIR/source/data/brkitr/root.txt
# paste the following block after the dictionaries block and before the final closing '}':
# paste the following block at the end, after the dictionaries block>
lstm{
Thai{"Thai_graphclust_model4_heavy.res"}
Mymr{"Burmese_graphclust_model5_heavy.res"}
}
# 6. Check which data files have modifications, which have been added or removed
# 5. Check which data files have modifications, which have been added or removed
# (if there are no changes, you may not need to proceed further). Make sure the
# list seems reasonable.
cd $ICU4C_DIR/..
cd $ICU4C_DIR/source/data
git status
# 6a. You may also want to check which files were modified in CLDR production data:
# 5a. You may also want to check which files were modified in CLDR production data:
cd $CLDR_TMP_DIR
git status
# 7. Fix any errors, investigate any warnings.
# 6. Fix any errors, investigate any warnings.
#
# Fixing may entail modifying CLDR source data or TOOLS_ROOT config files or
# tooling.
# 8. Now rebuild ICU4C with the new data and run make check tests.
# 7. Now rebuild ICU4C with the new data and run make check tests.
# Again, keep a log so you can investigate the errors.
cd $ICU4C_DIR/source
# 8a. If any files were added or removed (likely), re-run configure:
./runConfigureICU [--enable-debug] <platform>
# 7a. If any files were added or removed (likely), re-run configure:
./runConfigureICU <platform>
make clean
# 8b. Now do the rebuild.
make check 2>&1 | tee $NOTES/icu4c-newData-makeCheck.txt
# 7b. Now do the rebuild.
make check 2>&1 | tee /tmp/icu4c-newData-makeCheck.txt
# 9. Investigate each test case failure. The first run processing new CLDR data
# 8. Investigate each test case failure. The first run processing new CLDR data
# from the Survey Tool can result in thousands of failures (in many cases, one
# CLDR data fix can resolve hundreds of test failures). If the error is caused
# by bad CLDR data, then file a CLDR bug, fix the data, and regenerate from
@ -276,9 +249,9 @@ make check 2>&1 | tee $NOTES/icu4c-newData-makeCheck.txt
# Note that if the new data has any differences in structure, you will have to
# update test/testdata/structLocale.txt or /tsutil/cldrtest/TestLocaleStructure
# may fail.
# Repeat steps 4-8 until there are no errors.
# Repeat steps 4-7 until there are no errors.
# 10. You can also run the make check tests in exhaustive mode. As an alternative
# 9. You can also run the make check tests in exhaustive mode. As an alternative
# you can run them as part of the pre-merge tests by adding the following as a
# comment in the pull request: "/azp run CI-Exhaustive". You should do one or the
# other; the exhaustive tests are *not* run automatically on each pull request,
@ -287,10 +260,17 @@ make check 2>&1 | tee $NOTES/icu4c-newData-makeCheck.txt
cd $ICU4C_DIR/source
export INTLTEST_OPTS="-e"
export CINTLTST_OPTS="-e"
make check 2>&1 | tee $NOTES/icu4c-newData-makeCheckEx.txt
make check 2>&1 | tee /tmp/icu4c-newData-makeCheckEx.txt
# 11. Again, investigate each failure, fixing CLDR data or ICU test cases as
# appropriate, and repeating steps 4-8 and 10 until there are no errors.
# 10. Again, investigate each failure, fixing CLDR data or ICU test cases as
# appropriate, and repeating steps 4-7 and 9 until there are no errors.
# 11. Now with ICU4J, build and test without new data first, to verify that
# there are no pre-existing errors (or at least to have the pre-existing errors
# as a base for comparison):
cd $ICU4J_ROOT
ant check 2>&1 | tee /tmp/icu4j-oldData-antCheck.txt
# 12. Transfer the data to ICU4J:
cd $ICU4C_DIR/source
@ -311,7 +291,7 @@ make icu4j-data-install
# Keep a log so you can investigate the errors.
cd $ICU4J_ROOT
ant check 2>&1 | tee $NOTES/icu4j-newData-antCheck.txt
ant check 2>&1 | tee /tmp/icu4j-newData-antCheck.txt
# 14. Investigate test case failures; fix test cases and repeat from step 12,
# or fix CLDR data and repeat from step 4, as appropriate, until there are no

Просмотреть файл

@ -3,7 +3,7 @@
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
{
"cldrVersion": "41",
"cldrVersion": "40",
"aliases": {
"ars": "ar_SA",
"in": "id",

Просмотреть файл

@ -5,7 +5,7 @@ af{
collations{
standard{
Sequence{"&N<<<ʼn"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ am{
collations{
standard{
Sequence{"[reorder Ethi]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ ar{
"&ت<<ة<<<ﺔ<<<ﺓ"
"&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -397,7 +397,7 @@ ar{
"&ۓ‎=ﮰ‎=ﮱ"
"&ۀ‎=ﮤ‎=ﮥ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -11,7 +11,7 @@ as{
"&[before 1]ত<ৎ=ত্\u200D"
"&হ<ক্ষ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ az{
"[import az-u-co-standard]"
"[reorder others]"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -26,7 +26,7 @@ az{
"&H<x<<<X"
"&Z<w<<<W"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ be{
"&Е<ё<<<Ё"
"&у<ў<<<Ў"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ bg{
collations{
standard{
Sequence{"[reorder Cyrl]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ bn{
"[reorder Beng Deva Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
"&ঔ<ং<ঃ<ঁ"
}
Version{"41"}
Version{"40"}
}
traditional{
Sequence{
@ -629,7 +629,7 @@ bn{
"&যৌ<<<য়ৌ"
"&য্<<<য়্"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -65,7 +65,7 @@ bo{
"&ྲཱྀ=ཷ"
"&ླཱྀ=ཹ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ br{
"&C<ch<<<Ch<<<CH<c''h=c\u02BCh=c\u2019h<<<C''h=C\u02BCh=C\u2019h<<<C'"
"'H=C\u02BCH=C\u2019H"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,11 +5,11 @@ bs{
collations{
search{
Sequence{"[import hr-u-co-search]"}
Version{"41"}
Version{"40"}
}
standard{
Sequence{"[import hr]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ bs_Cyrl{
collations{
standard{
Sequence{"[import sr]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ ca{
"[import und-u-co-search]"
"&L<ŀ=l·<<<Ŀ=L·"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ ceb{
collations{
standard{
Sequence{"&N<ñ<<<Ñ<ng<<<Ng<<<NG"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ chr{
collations{
standard{
Sequence{"[reorder Cher]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -11,7 +11,7 @@ cs{
"&S<š<<<Š"
"&Z<ž<<<Ž"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -14,7 +14,7 @@ cy{
"&R<rh<<<Rh<<<RH"
"&T<th<<<Th<<<TH"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ da{
"[import da-u-co-standard]"
"[caseFirst off]"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -21,7 +21,7 @@ da{
"&[before 1]ǀ<æ<<<Æ<<ä<<<Ä<ø<<<Ø<<ö<<<Ö<<ő<<<Ő<å<<<Å<<<aa<<<Aa<<<AA"
"&oe<<œ<<<Œ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,14 +9,14 @@ de{
"&OE<<ö<<<Ö"
"&UE<<ü<<<Ü"
}
Version{"41"}
Version{"40"}
}
search{
Sequence{
"[import und-u-co-search]"
"[import de-u-co-phonebk]"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -10,7 +10,7 @@ de_AT{
"&u<ü<<<Ü"
"&ss<ß<<<ẞ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -14,7 +14,7 @@ dsb{
"&S<š<<<Š<ś<<<Ś"
"&Z<ž<<<Ž<ź<<<Ź"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -17,7 +17,7 @@ ee{
"&T<ts<<<Ts<<<TS"
"&V<ʋ<<<Ʋ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ el{
"[normalization on]"
"[reorder Grek]"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ en_US_POSIX{
"&A<*'\u0020'-'/'<*0-'@'<*ABCDEFGHIJKLMNOPQRSTUVWXYZ<*'['-'`'<*abcdefghijklmnopqrstuvwxyz"
"<*'{'-'\u007F'"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -12,7 +12,7 @@ eo{
"&S<ŝ<<<Ŝ"
"&U<ŭ<<<Ŭ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,11 +8,11 @@ es{
"[import und-u-co-search]"
"&N<ñ<<<Ñ"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{"&N<ñ<<<Ñ"}
Version{"41"}
Version{"40"}
}
traditional{
Sequence{
@ -20,7 +20,7 @@ es{
"&C<ch<<<Ch<<<CH"
"&l<ll<<<Ll<<<LL"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ et{
"&[before 1]T<š<<<Š<z<<<Z<ž<<<Ž"
"&[before 1]X<õ<<<Õ<ä<<<Ä<ö<<<Ö<ü<<<Ü"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -16,7 +16,7 @@ fa{
"&ۏ<ه<<ە<<ہ<<ة<<ۃ<<ۀ<<ھ"
"&ی<<*ىےيېۑۍێ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ fa_AF{
collations{
standard{
Sequence{"[import ps]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -143,7 +143,7 @@ ff_Adlm{
"&𞤵<𞤵𞥅"
"&𞤵𞥅<<𞤵𞤵"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ fi{
"[import und-u-co-search]"
"[import fi-u-co-trad]"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -20,7 +20,7 @@ fi{
"&Z\u0335<<ʒ<<<Ʒ"
"&[before 1]ǀ<å<<<Å<ä<<<Ä<<æ<<<Æ<ö<<<Ö<<ø<<<Ø"
}
Version{"41"}
Version{"40"}
}
traditional{
Sequence{
@ -31,7 +31,7 @@ fi{
"&Y<<ü<<<Ü<<ű<<<Ű"
"&[before 1]ǀ<å<<<Å<ä<<<Ä<<æ<<<Æ<ö<<<Ö<<ø<<<Ø<<ő<<<Ő<<õ<<<Õ<<œ<<<Œ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ fil{
collations{
standard{
Sequence{"&N<ñ<<<Ñ<ng<<<Ng<<<NG"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ fo{
"[import und-u-co-search]"
"[import fo-u-co-standard]"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -18,7 +18,7 @@ fo{
"&Y<<ü<<<Ü<<ű<<<Ű"
"&[before 1]ǀ<æ<<<Æ<<ä<<<Ä<<ę<<<Ę<ø<<<Ø<<ö<<<Ö<<ő<<<Ő<<œ<<<Œ<å<<<Å<<<aa<<<Aa<<<AA"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,7 +5,7 @@ fr_CA{
collations{
standard{
Sequence{"[backwards 2]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -5,11 +5,11 @@ gl{
collations{
search{
Sequence{"[import es-u-co-search]"}
Version{"41"}
Version{"40"}
}
standard{
Sequence{"[import es]"}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -9,7 +9,7 @@ gu{
"[reorder Gujr Deva Beng Guru Orya Taml Telu Knda Mlym Sinh]"
"&ૐ<ં<<ઁ<"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -12,7 +12,7 @@ ha{
"&T<ts<<<Ts<<<TS"
"&Y<ƴ<<<ʼy<<<''y<<<Ƴ<<<ʼY<<<''Y"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -8,7 +8,7 @@ haw{
"&a<e<<<E<i<<<I<o<<<O<u<<<U"
"&w<ʻ"
}
Version{"41"}
Version{"40"}
}
}
}

Просмотреть файл

@ -11,7 +11,7 @@ he{
"&״"
"<<'\u0022'"
}
Version{"41"}
Version{"40"}
}
standard{
Sequence{
@ -20,7 +20,7 @@ he{
"&[before 2]''<<׳"
"&[before 2]'\u0022'<<״"
}
Version{"41"}
Version{"40"}
}
}
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше