From 2b957702c7e046b6229eb395ae0bb8a7c53cab47 Mon Sep 17 00:00:00 2001 From: Jonathan Kew Date: Mon, 20 Feb 2012 20:39:59 +0000 Subject: [PATCH] bug 728866 - pt 1 - expose low-level APIs for canonical composition and decomposition of a single Unicode character. r=smontagu --HG-- rename : intl/unicharutil/src/nsUnicodeNormalizer.h => intl/unicharutil/public/nsUnicodeNormalizer.h --- intl/unicharutil/public/Makefile.in | 1 + .../{src => public}/nsUnicodeNormalizer.h | 4 +++ intl/unicharutil/src/nsUnicodeNormalizer.cpp | 27 +++++++++++++++++++ 3 files changed, 32 insertions(+) rename intl/unicharutil/{src => public}/nsUnicodeNormalizer.h (91%) diff --git a/intl/unicharutil/public/Makefile.in b/intl/unicharutil/public/Makefile.in index 9bf08cd718c9..6ebd062b3952 100644 --- a/intl/unicharutil/public/Makefile.in +++ b/intl/unicharutil/public/Makefile.in @@ -48,6 +48,7 @@ EXPORTS = \ nsICaseConversion.h \ nsIUGenCategory.h \ nsUnicharUtilCIID.h \ + nsUnicodeNormalizer.h \ $(NULL) include $(topsrcdir)/config/rules.mk diff --git a/intl/unicharutil/src/nsUnicodeNormalizer.h b/intl/unicharutil/public/nsUnicodeNormalizer.h similarity index 91% rename from intl/unicharutil/src/nsUnicodeNormalizer.h rename to intl/unicharutil/public/nsUnicodeNormalizer.h index 242f26e8e93f..6d500edcfb85 100644 --- a/intl/unicharutil/src/nsUnicodeNormalizer.h +++ b/intl/unicharutil/public/nsUnicodeNormalizer.h @@ -57,6 +57,10 @@ public: NS_IMETHOD NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest); NS_IMETHOD NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest); NS_IMETHOD NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest); + + // low-level access to the composition data needed for HarfBuzz callbacks + static bool Compose(PRUint32 a, PRUint32 b, PRUint32 *ab); + static bool DecomposeNonRecursively(PRUint32 comp, PRUint32 *c1, PRUint32 *c2); }; #endif //nsUnicodeNormalizer_h__ diff --git a/intl/unicharutil/src/nsUnicodeNormalizer.cpp b/intl/unicharutil/src/nsUnicodeNormalizer.cpp index 97da73e9a5e4..c63339f63d26 100644 --- a/intl/unicharutil/src/nsUnicodeNormalizer.cpp +++ b/intl/unicharutil/src/nsUnicodeNormalizer.cpp @@ -713,3 +713,30 @@ nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDe return mdn_normalize(true, true, aSrc, aDest); } +bool +nsUnicodeNormalizer::Compose(PRUint32 a, PRUint32 b, PRUint32 *ab) +{ + return mdn__unicode_compose(a, b, ab) == NS_OK; +} + +bool +nsUnicodeNormalizer::DecomposeNonRecursively(PRUint32 c, PRUint32 *c1, PRUint32 *c2) +{ + // We can't use mdn__unicode_decompose here, because that does a recursive + // decomposition that may yield more than two characters, but the harfbuzz + // callback wants just a single-step decomp that is guaranteed to produce + // no more than two characters. So we do a low-level lookup in the table + // of decomp sequences. + const PRUint32 *seq; + PRUint32 seqidx = decompose_char(c, &seq); + if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) { + return false; + } + *c1 = *seq & ~END_BIT; + if (*seq & END_BIT) { + *c2 = 0; + } else { + *c2 = *++seq & ~END_BIT; + } + return true; +}