зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1421938 - Implement a mozilla::intl::QuotesForLang utility to return localized quotation marks for a given locale, based on CLDR data. r=emilio
Differential Revision: https://phabricator.services.mozilla.com/D36428 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
15ab13907f
Коммит
54986d1806
|
@ -0,0 +1,78 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode:nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "Quotes.h"
|
||||
#include "MozLocale.h"
|
||||
#include "mozilla/ClearOnShutdown.h"
|
||||
#include "mozilla/StaticPtr.h"
|
||||
#include "nsDataHashtable.h"
|
||||
#include "nsPrintfCString.h"
|
||||
|
||||
using namespace mozilla;
|
||||
using namespace mozilla::intl;
|
||||
|
||||
namespace {
|
||||
struct LangQuotesRec {
|
||||
const char* mLangs;
|
||||
Quotes mQuotes;
|
||||
};
|
||||
|
||||
#include "cldr-quotes.inc"
|
||||
|
||||
static StaticAutoPtr<nsDataHashtable<nsCStringHashKey, Quotes>> sQuotesForLang;
|
||||
} // anonymous namespace
|
||||
|
||||
namespace mozilla {
|
||||
namespace intl {
|
||||
|
||||
const Quotes* QuotesForLang(const nsAtom* aLang) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
// On first use, initialize the hashtable from our CLDR-derived data array.
|
||||
if (!sQuotesForLang) {
|
||||
sQuotesForLang = new nsDataHashtable<nsCStringHashKey, Quotes>(32);
|
||||
ClearOnShutdown(&sQuotesForLang);
|
||||
for (const auto& i : sLangQuotes) {
|
||||
const char* s = i.mLangs;
|
||||
size_t len;
|
||||
while ((len = strlen(s))) {
|
||||
sQuotesForLang->Put(nsDependentCString(s, len), i.mQuotes);
|
||||
s += len + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nsAtomCString langStr(aLang);
|
||||
const Quotes* entry = sQuotesForLang->GetValue(langStr);
|
||||
if (entry) {
|
||||
// Found an exact match for the requested lang.
|
||||
return entry;
|
||||
}
|
||||
|
||||
// Try parsing lang as a Locale (which will also canonicalize case of the
|
||||
// subtags), then see if we can match it with region or script subtags,
|
||||
// if present, or just the primary language tag.
|
||||
Locale loc(langStr);
|
||||
if (loc.IsWellFormed()) {
|
||||
if (!loc.GetRegion().IsEmpty() &&
|
||||
(entry = sQuotesForLang->GetValue(nsPrintfCString(
|
||||
"%s-%s", loc.GetLanguage().get(), loc.GetRegion().get())))) {
|
||||
return entry;
|
||||
}
|
||||
if (!loc.GetScript().IsEmpty() &&
|
||||
(entry = sQuotesForLang->GetValue(nsPrintfCString(
|
||||
"%s-%s", loc.GetLanguage().get(), loc.GetScript().get())))) {
|
||||
return entry;
|
||||
}
|
||||
if ((entry = sQuotesForLang->GetValue(loc.GetLanguage()))) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace intl
|
||||
} // namespace mozilla
|
|
@ -0,0 +1,35 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode:nil; c-basic-offset: 2 -*- */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef mozilla_intl_Quotes_h__
|
||||
#define mozilla_intl_Quotes_h__
|
||||
|
||||
#include "nsAtom.h"
|
||||
|
||||
namespace mozilla {
|
||||
namespace intl {
|
||||
|
||||
// Currently, all the quotation characters provided by CLDR are single BMP
|
||||
// codepoints, so they fit into char16_t fields. If there are ever multi-
|
||||
// character strings or non-BMP codepoints in a future version, we'll need
|
||||
// to extend this to a larger/more flexible structure, but for now it's
|
||||
// deliberately kept simple and lightweight.
|
||||
struct Quotes {
|
||||
// Entries in order [open, close, alternativeOpen, alternativeClose]
|
||||
char16_t mChars[4];
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a pointer to the Quotes record for the given locale (lang attribute),
|
||||
* or nullptr if none available.
|
||||
* The returned value points to a hashtable entry, but will remain valid until
|
||||
* shutdown begins, as the table is not modified after initialization.
|
||||
*/
|
||||
const Quotes* QuotesForLang(const nsAtom* aLang);
|
||||
|
||||
} // namespace intl
|
||||
} // namespace mozilla
|
||||
|
||||
#endif // mozilla_intl_Quotes_h__
|
|
@ -0,0 +1,45 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/*
|
||||
* Derived from the Unicode Common Locale Data Repository by cldr-quotes.pl.
|
||||
*
|
||||
* For terms of use, see http://www.unicode.org/copyright.html.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Created on Fri Jul 5 20:02:29 2019 from CLDR data file cldr-common-35.1.zip.
|
||||
*
|
||||
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
|
||||
*
|
||||
* (generated by intl/locale/cldr-quotes.pl)
|
||||
*/
|
||||
|
||||
static const LangQuotesRec sLangQuotes[] = {
|
||||
// clang-format off
|
||||
{ "af\0ak\0as\0asa\0az\0bem\0bez\0bn\0brx\0ccp\0ceb\0cgg\0chr\0cy\0da\0dav\0dje\0dz\0ebu\0ee\0en\0fil\0fo\0ga\0gd\0gl\0gu\0guz\0ha\0id\0ig\0jmc\0jv\0kam\0kde\0kea\0khq\0ki\0kln\0km\0kn\0kok\0ksb\0ku\0lg\0ln\0lo\0lrc\0lu\0luo\0lv\0mas\0mer\0mfe\0mgo\0mi\0ml\0mn\0mr\0ms\0mt\0my\0naq\0nd\0ne\0nus\0nyn\0or\0ps\0rof\0rwk\0saq\0sbp\0sd\0seh\0ses\0si\0so\0sw\0te\0teo\0th\0to\0tr\0tt\0twq\0tzm\0uz-Cyrl\0vai\0vun\0wo\0xog\0yo\0yue-Hans\0zh\0zu\0", { 0x201c, 0x201d, 0x2018, 0x2019 } },
|
||||
{ "agq\0ff\0", { 0x201e, 0x201d, 0x201a, 0x2019 } },
|
||||
{ "am\0az-Cyrl\0fa\0fr-CH\0gsw\0jgo\0kkj\0mzn\0", { 0xab, 0xbb, 0x2039, 0x203a } },
|
||||
{ "ar\0ur\0", { 0x201d, 0x201c, 0x2019, 0x2018 } },
|
||||
{ "ast\0bm\0ca\0dyo\0el\0es\0ewo\0it\0kab\0kk\0mg\0mua\0nnh\0pt-PT\0sg\0sq\0", { 0xab, 0xbb, 0x201c, 0x201d } },
|
||||
{ "bas\0be\0ky\0ru\0sah\0uk\0", { 0xab, 0xbb, 0x201e, 0x201c } },
|
||||
{ "bg\0lt\0", { 0x201e, 0x201c, 0x201e, 0x201c } },
|
||||
{ "br\0", { 0x201c, 0x201d, 0xab, 0xbb } },
|
||||
{ "bs-Cyrl\0cs\0de\0dsb\0et\0hr\0hsb\0is\0lb\0luy\0mk\0sk\0sl\0", { 0x201e, 0x201c, 0x201a, 0x2018 } },
|
||||
{ "bs\0", { 0x201e, 0x201d, 0x2018, 0x2019 } },
|
||||
{ "dua\0ksf\0nb\0nn\0rw\0", { 0xab, 0xbb, 0x2018, 0x2019 } },
|
||||
{ "es-419\0eu\0tk\0", { 0x201c, 0x201d, 0x201c, 0x201d } },
|
||||
{ "fi\0he\0lag\0rn\0sn\0sv\0", { 0x201d, 0x201d, 0x2019, 0x2019 } },
|
||||
{ "fr-CA\0", { 0xab, 0xbb, 0x201d, 0x201c } },
|
||||
{ "fr\0hy\0tg\0yav\0", { 0xab, 0xbb, 0xab, 0xbb } },
|
||||
{ "hu\0", { 0x201e, 0x201d, 0xbb, 0xab } },
|
||||
{ "ia\0nl\0ti-ER\0xh\0", { 0x2018, 0x2019, 0x201c, 0x201d } },
|
||||
{ "ja\0yue\0zh-Hant\0", { 0x300c, 0x300d, 0x300e, 0x300f } },
|
||||
{ "ka\0", { 0x201e, 0x201c, 0xab, 0xbb } },
|
||||
{ "nmg\0pl\0ro\0", { 0x201e, 0x201d, 0xab, 0xbb } },
|
||||
{ "shi\0zgh\0", { 0xab, 0xbb, 0x201e, 0x201d } },
|
||||
{ "sr\0", { 0x201e, 0x201c, 0x2018, 0x2018 } },
|
||||
{ "uz\0", { 0x201c, 0x201d, 0x2019, 0x2018 } },
|
||||
// clang-format on
|
||||
};
|
|
@ -0,0 +1,104 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# Tool to generate the cldr-quotes.inc file, to be #include'd in Quotes.cpp
|
||||
# to provide locale-appropriate opening and closing quote marks.
|
||||
|
||||
# To regenerate cldr-quotes.inc for a new CLDR release, download the data file
|
||||
# "cldr-common-##.zip" from http://unicode.org/Public/cldr/latest into the
|
||||
# current directory, update the $filename variable below accordingly, run
|
||||
#
|
||||
# perl cldr-quotes.pl > cldr-quotes.inc
|
||||
#
|
||||
# then use `hg diff` to check that the result looks sane.
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
use Encode;
|
||||
use IO::Uncompress::Unzip;
|
||||
|
||||
my $filename = 'cldr-common-35.1.zip';
|
||||
|
||||
my (%langQuotes, %quoteLangs);
|
||||
|
||||
my $zip = IO::Uncompress::Unzip->new($filename) ||
|
||||
die "unzip failed: $IO::Uncompress::Unzip::UnzipError\n";
|
||||
my $status = 1;
|
||||
while ($status > 0) {
|
||||
my $name = $zip->getHeaderInfo()->{Name};
|
||||
if ($name =~ m@common/main/([A-Za-z0-9_]+)\.xml@) {
|
||||
my $lang = $1;
|
||||
$lang =~ s/_/-/;
|
||||
while (<$zip>) {
|
||||
$langQuotes{$lang}[0] = $1 if (m!<quotationStart>(.+)<!);
|
||||
$langQuotes{$lang}[1] = $1 if (m!<quotationEnd>(.+)<!);
|
||||
$langQuotes{$lang}[2] = $1 if (m!<alternateQuotationStart>(.+)<!);
|
||||
$langQuotes{$lang}[3] = $1 if (m!<alternateQuotationEnd>(.+)<!);
|
||||
}
|
||||
}
|
||||
$status = $zip->nextStream();
|
||||
}
|
||||
$zip->close;
|
||||
|
||||
foreach my $lang (sort keys %langQuotes) {
|
||||
# We don't actually want to emit anything for the root locale
|
||||
next if $lang eq "root";
|
||||
|
||||
# Inherit any missing entries from the locale's parent
|
||||
my $parent = $lang;
|
||||
while ($parent =~ m/\-/) {
|
||||
# Strip off a trailing subtag to find a parent locale code
|
||||
$parent =~ s/\-[^-]+$//;
|
||||
# Fill in any values available from the parent
|
||||
for (my $i = 0; $i < 4; $i++) {
|
||||
$langQuotes{$lang}[$i] = $langQuotes{$parent}[$i] unless $langQuotes{$lang}[$i];
|
||||
}
|
||||
}
|
||||
|
||||
# Anything still missing is copied from the root locale
|
||||
for (my $i = 0; $i < 4; $i++) {
|
||||
$langQuotes{$lang}[$i] = $langQuotes{"root"}[$i] unless $langQuotes{$lang}[$i];
|
||||
}
|
||||
|
||||
# If the locale ends up the same as its parent, skip
|
||||
next if ($parent ne $lang) && (exists $langQuotes{$parent}) &&
|
||||
(join(",", @{$langQuotes{$lang}}) eq join(",", @{$langQuotes{$parent}}));
|
||||
|
||||
# Create a string with the C source form for the array of 4 quote characters
|
||||
my $quoteChars = join(", ", map { sprintf("0x%x", ord Encode::decode("UTF-8", $_)) } @{$langQuotes{$lang}});
|
||||
|
||||
# Record this locale in the list of those which use this particular set of quotes
|
||||
$quoteLangs{$quoteChars} = [] unless exists $quoteLangs{$quoteChars};
|
||||
push $quoteLangs{$quoteChars}, $lang;
|
||||
}
|
||||
|
||||
# Output each unique list of quotes, with the string of associated locales
|
||||
my $timestamp = gmtime();
|
||||
print <<__EOT__;
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/*
|
||||
* Derived from the Unicode Common Locale Data Repository by cldr-quotes.pl.
|
||||
*
|
||||
* For terms of use, see http://www.unicode.org/copyright.html.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Created on $timestamp from CLDR data file $filename.
|
||||
*
|
||||
* * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
|
||||
*
|
||||
* (generated by intl/locale/cldr-quotes.pl)
|
||||
*/
|
||||
|
||||
__EOT__
|
||||
|
||||
print "static const LangQuotesRec sLangQuotes[] = {\n";
|
||||
print " // clang-format off\n";
|
||||
print sort map { sprintf(" { \"%s\\0\", { %s } },\n", join("\\0", sort @{$quoteLangs{$_}}), $_) } (keys %quoteLangs);
|
||||
print " // clang-format on\n";
|
||||
print "};\n";
|
|
@ -36,6 +36,7 @@ EXPORTS.mozilla.intl += [
|
|||
'LocaleService.h',
|
||||
'MozLocale.h',
|
||||
'OSPreferences.h',
|
||||
'Quotes.h',
|
||||
]
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
|
@ -47,6 +48,7 @@ UNIFIED_SOURCES += [
|
|||
'nsLanguageAtomService.cpp',
|
||||
'nsUConvPropertySearch.cpp',
|
||||
'OSPreferences.cpp',
|
||||
'Quotes.cpp',
|
||||
]
|
||||
|
||||
EXTRA_JS_MODULES += [
|
||||
|
|
Загрузка…
Ссылка в новой задаче