зеркало из https://github.com/mozilla/gecko-dev.git
273 строки
11 KiB
C
273 строки
11 KiB
C
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef nsBidiUtils_h__
|
|
#define nsBidiUtils_h__
|
|
|
|
#include "nsStringGlue.h"
|
|
|
|
/**
|
|
* Read ftp://ftp.unicode.org/Public/UNIDATA/ReadMe-Latest.txt
|
|
* section BIDIRECTIONAL PROPERTIES
|
|
* for the detailed definition of the following categories
|
|
*
|
|
* The values here must match the equivalents in %bidicategorycode in
|
|
* mozilla/intl/unicharutil/tools/genUnicodePropertyData.pl,
|
|
* and must also match the values used by ICU's UCharDirection.
|
|
*/
|
|
|
|
enum nsCharType {
|
|
eCharType_LeftToRight = 0,
|
|
eCharType_RightToLeft = 1,
|
|
eCharType_EuropeanNumber = 2,
|
|
eCharType_EuropeanNumberSeparator = 3,
|
|
eCharType_EuropeanNumberTerminator = 4,
|
|
eCharType_ArabicNumber = 5,
|
|
eCharType_CommonNumberSeparator = 6,
|
|
eCharType_BlockSeparator = 7,
|
|
eCharType_SegmentSeparator = 8,
|
|
eCharType_WhiteSpaceNeutral = 9,
|
|
eCharType_OtherNeutral = 10,
|
|
eCharType_LeftToRightEmbedding = 11,
|
|
eCharType_LeftToRightOverride = 12,
|
|
eCharType_RightToLeftArabic = 13,
|
|
eCharType_RightToLeftEmbedding = 14,
|
|
eCharType_RightToLeftOverride = 15,
|
|
eCharType_PopDirectionalFormat = 16,
|
|
eCharType_DirNonSpacingMark = 17,
|
|
eCharType_BoundaryNeutral = 18,
|
|
eCharType_FirstStrongIsolate = 19,
|
|
eCharType_LeftToRightIsolate = 20,
|
|
eCharType_RightToLeftIsolate = 21,
|
|
eCharType_PopDirectionalIsolate = 22,
|
|
eCharType_CharTypeCount
|
|
};
|
|
|
|
/**
|
|
* This specifies the language directional property of a character set.
|
|
*/
|
|
typedef enum nsCharType nsCharType;
|
|
|
|
/**
|
|
* Find the direction of an embedding level or paragraph level set by
|
|
* the Unicode Bidi Algorithm. (Even levels are left-to-right, odd
|
|
* levels right-to-left.
|
|
*/
|
|
#define IS_LEVEL_RTL(level) (((level) & 1) == 1)
|
|
|
|
/**
|
|
* Check whether two bidi levels have the same parity and thus the same
|
|
* directionality
|
|
*/
|
|
#define IS_SAME_DIRECTION(level1, level2) (((level1 ^ level2) & 1) == 0)
|
|
|
|
/**
|
|
* Convert from nsBidiLevel to nsBidiDirection
|
|
*/
|
|
#define DIRECTION_FROM_LEVEL(level) ((IS_LEVEL_RTL(level)) \
|
|
? NSBIDI_RTL : NSBIDI_LTR)
|
|
|
|
/**
|
|
* definitions of bidirection character types by category
|
|
*/
|
|
|
|
#define CHARTYPE_IS_RTL(val) ( ( (val) == eCharType_RightToLeft) || ( (val) == eCharType_RightToLeftArabic) )
|
|
|
|
#define CHARTYPE_IS_WEAK(val) ( ( (val) == eCharType_EuropeanNumberSeparator) \
|
|
|| ( (val) == eCharType_EuropeanNumberTerminator) \
|
|
|| ( ( (val) > eCharType_ArabicNumber) && ( (val) != eCharType_RightToLeftArabic) ) )
|
|
|
|
/**
|
|
* Inspects a Unichar, converting numbers to Arabic or Hindi forms and returning them
|
|
* @param aChar is the character
|
|
* @param aPrevCharArabic is true if the previous character in the string is an Arabic char
|
|
* @param aNumFlag specifies the conversion to perform:
|
|
* IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
|
|
* IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
|
|
* IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
|
|
* IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
|
|
* @return the converted Unichar
|
|
*/
|
|
char16_t HandleNumberInChar(char16_t aChar, bool aPrevCharArabic, uint32_t aNumFlag);
|
|
|
|
/**
|
|
* Scan a Unichar string, converting numbers to Arabic or Hindi forms in place
|
|
* @param aBuffer is the string
|
|
* @param aSize is the size of aBuffer
|
|
* @param aNumFlag specifies the conversion to perform:
|
|
* IBMBIDI_NUMERAL_NOMINAL: don't do any conversion
|
|
* IBMBIDI_NUMERAL_HINDI: convert to Hindi forms (Unicode 0660-0669)
|
|
* IBMBIDI_NUMERAL_ARABIC: convert to Arabic forms (Unicode 0030-0039)
|
|
* IBMBIDI_NUMERAL_HINDICONTEXT: convert numbers in Arabic text to Hindi, otherwise to Arabic
|
|
*/
|
|
nsresult HandleNumbers(char16_t* aBuffer, uint32_t aSize, uint32_t aNumFlag);
|
|
|
|
/**
|
|
* Give a UTF-32 codepoint
|
|
* return true if the codepoint is a Bidi control character (LRM, RLM, ALM;
|
|
* LRE, RLE, PDF, LRO, RLO; LRI, RLI, FSI, PDI).
|
|
* Return false, otherwise
|
|
*/
|
|
#define LRM_CHAR 0x200e
|
|
#define RLM_CHAR 0x200f
|
|
|
|
#define LRE_CHAR 0x202a
|
|
#define RLE_CHAR 0x202b
|
|
#define PDF_CHAR 0x202c
|
|
#define LRO_CHAR 0x202d
|
|
#define RLO_CHAR 0x202e
|
|
|
|
#define LRI_CHAR 0x2066
|
|
#define RLI_CHAR 0x2067
|
|
#define FSI_CHAR 0x2068
|
|
#define PDI_CHAR 0x2069
|
|
|
|
#define ALM_CHAR 0x061C
|
|
inline bool IsBidiControl(uint32_t aChar) {
|
|
return ((LRE_CHAR <= aChar && aChar <= RLO_CHAR) ||
|
|
(LRI_CHAR <= aChar && aChar <= PDI_CHAR) ||
|
|
(aChar == ALM_CHAR) ||
|
|
(aChar & 0xfffffe) == LRM_CHAR);
|
|
}
|
|
|
|
/**
|
|
* Give a UTF-32 codepoint
|
|
* Return true if the codepoint is a Bidi control character that may result
|
|
* in RTL directionality and therefore needs to trigger bidi resolution;
|
|
* return false otherwise.
|
|
*/
|
|
inline bool IsBidiControlRTL(uint32_t aChar) {
|
|
return aChar == RLM_CHAR ||
|
|
aChar == RLE_CHAR ||
|
|
aChar == RLO_CHAR ||
|
|
aChar == RLI_CHAR ||
|
|
aChar == ALM_CHAR;
|
|
}
|
|
|
|
/**
|
|
* Give a 16-bit (UTF-16) text buffer and length
|
|
* @return true if the string contains right-to-left characters
|
|
*/
|
|
bool HasRTLChars(const char16_t* aText, uint32_t aLength);
|
|
|
|
/**
|
|
* Convenience function to call the above on an nsAString.
|
|
*/
|
|
inline bool HasRTLChars(const nsAString& aString) {
|
|
return HasRTLChars(aString.BeginReading(), aString.Length());
|
|
}
|
|
|
|
// These values are shared with Preferences dialog
|
|
// ------------------
|
|
// If Pref values are to be changed
|
|
// in the XUL file of Prefs. the values
|
|
// Must be changed here too..
|
|
// ------------------
|
|
//
|
|
#define IBMBIDI_TEXTDIRECTION_STR "bidi.direction"
|
|
#define IBMBIDI_TEXTTYPE_STR "bidi.texttype"
|
|
#define IBMBIDI_NUMERAL_STR "bidi.numeral"
|
|
|
|
// ------------------
|
|
// Text Direction
|
|
// ------------------
|
|
// bidi.direction
|
|
#define IBMBIDI_TEXTDIRECTION_LTR 1 // 1 = directionLTRBidi *
|
|
#define IBMBIDI_TEXTDIRECTION_RTL 2 // 2 = directionRTLBidi
|
|
// ------------------
|
|
// Text Type
|
|
// ------------------
|
|
// bidi.texttype
|
|
#define IBMBIDI_TEXTTYPE_CHARSET 1 // 1 = charsettexttypeBidi *
|
|
#define IBMBIDI_TEXTTYPE_LOGICAL 2 // 2 = logicaltexttypeBidi
|
|
#define IBMBIDI_TEXTTYPE_VISUAL 3 // 3 = visualtexttypeBidi
|
|
// ------------------
|
|
// Numeral Style
|
|
// ------------------
|
|
// bidi.numeral
|
|
#define IBMBIDI_NUMERAL_NOMINAL 0 // 0 = nominalnumeralBidi *
|
|
#define IBMBIDI_NUMERAL_REGULAR 1 // 1 = regularcontextnumeralBidi
|
|
#define IBMBIDI_NUMERAL_HINDICONTEXT 2 // 2 = hindicontextnumeralBidi
|
|
#define IBMBIDI_NUMERAL_ARABIC 3 // 3 = arabicnumeralBidi
|
|
#define IBMBIDI_NUMERAL_HINDI 4 // 4 = hindinumeralBidi
|
|
#define IBMBIDI_NUMERAL_PERSIANCONTEXT 5 // 5 = persiancontextnumeralBidi
|
|
#define IBMBIDI_NUMERAL_PERSIAN 6 // 6 = persiannumeralBidi
|
|
|
|
#define IBMBIDI_DEFAULT_BIDI_OPTIONS \
|
|
((IBMBIDI_TEXTDIRECTION_LTR<<0) | \
|
|
(IBMBIDI_TEXTTYPE_CHARSET<<4) | \
|
|
(IBMBIDI_NUMERAL_NOMINAL<<8))
|
|
|
|
#define GET_BIDI_OPTION_DIRECTION(bo) (((bo)>>0) & 0x0000000F) /* 4 bits for DIRECTION */
|
|
#define GET_BIDI_OPTION_TEXTTYPE(bo) (((bo)>>4) & 0x0000000F) /* 4 bits for TEXTTYPE */
|
|
#define GET_BIDI_OPTION_NUMERAL(bo) (((bo)>>8) & 0x0000000F) /* 4 bits for NUMERAL */
|
|
|
|
#define SET_BIDI_OPTION_DIRECTION(bo, dir) {(bo)=((bo) & 0xFFFFFFF0)|(((dir)& 0x0000000F)<<0);}
|
|
#define SET_BIDI_OPTION_TEXTTYPE(bo, tt) {(bo)=((bo) & 0xFFFFFF0F)|(((tt)& 0x0000000F)<<4);}
|
|
#define SET_BIDI_OPTION_NUMERAL(bo, num) {(bo)=((bo) & 0xFFFFF0FF)|(((num)& 0x0000000F)<<8);}
|
|
|
|
/* Constants related to the position of numerics in the codepage */
|
|
#define START_HINDI_DIGITS 0x0660
|
|
#define END_HINDI_DIGITS 0x0669
|
|
#define START_ARABIC_DIGITS 0x0030
|
|
#define END_ARABIC_DIGITS 0x0039
|
|
#define START_FARSI_DIGITS 0x06f0
|
|
#define END_FARSI_DIGITS 0x06f9
|
|
#define IS_HINDI_DIGIT(u) ( ( (u) >= START_HINDI_DIGITS ) && ( (u) <= END_HINDI_DIGITS ) )
|
|
#define IS_ARABIC_DIGIT(u) ( ( (u) >= START_ARABIC_DIGITS ) && ( (u) <= END_ARABIC_DIGITS ) )
|
|
#define IS_FARSI_DIGIT(u) ( ( (u) >= START_FARSI_DIGITS ) && ( (u) <= END_FARSI_DIGITS ) )
|
|
/**
|
|
* Arabic numeric separator and numeric formatting characters:
|
|
* U+0600;ARABIC NUMBER SIGN
|
|
* U+0601;ARABIC SIGN SANAH
|
|
* U+0602;ARABIC FOOTNOTE MARKER
|
|
* U+0603;ARABIC SIGN SAFHA
|
|
* U+066A;ARABIC PERCENT SIGN
|
|
* U+066B;ARABIC DECIMAL SEPARATOR
|
|
* U+066C;ARABIC THOUSANDS SEPARATOR
|
|
* U+06DD;ARABIC END OF AYAH
|
|
*/
|
|
#define IS_ARABIC_SEPARATOR(u) ( ( /*(u) >= 0x0600 &&*/ (u) <= 0x0603 ) || \
|
|
( (u) >= 0x066A && (u) <= 0x066C ) || \
|
|
( (u) == 0x06DD ) )
|
|
|
|
#define IS_BIDI_DIACRITIC(u) ( \
|
|
( (u) >= 0x0591 && (u) <= 0x05A1) || ( (u) >= 0x05A3 && (u) <= 0x05B9) \
|
|
|| ( (u) >= 0x05BB && (u) <= 0x05BD) || ( (u) == 0x05BF) || ( (u) == 0x05C1) \
|
|
|| ( (u) == 0x05C2) || ( (u) == 0x05C4) \
|
|
|| ( (u) >= 0x064B && (u) <= 0x0652) || ( (u) == 0x0670) \
|
|
|| ( (u) >= 0x06D7 && (u) <= 0x06E4) || ( (u) == 0x06E7) || ( (u) == 0x06E8) \
|
|
|| ( (u) >= 0x06EA && (u) <= 0x06ED) )
|
|
|
|
#define IS_HEBREW_CHAR(c) (((0x0590 <= (c)) && ((c) <= 0x05FF)) || (((c) >= 0xfb1d) && ((c) <= 0xfb4f)))
|
|
#define IS_ARABIC_CHAR(c) ( (0x0600 <= (c) && (c) <= 0x08FF) && \
|
|
( (c) <= 0x06ff || \
|
|
((c) >= 0x0750 && (c) <= 0x077f) || \
|
|
(c) >= 0x08a0 ) )
|
|
#define IS_ARABIC_ALPHABETIC(c) (IS_ARABIC_CHAR(c) && \
|
|
!(IS_HINDI_DIGIT(c) || IS_FARSI_DIGIT(c) || IS_ARABIC_SEPARATOR(c)))
|
|
|
|
/**
|
|
* The codepoint ranges in the following macros are based on the blocks
|
|
* allocated, or planned to be allocated, to right-to-left characters in the
|
|
* BMP (Basic Multilingual Plane) and SMP (Supplementary Multilingual Plane)
|
|
* according to
|
|
* http://unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt and
|
|
* http://www.unicode.org/roadmaps/
|
|
*/
|
|
|
|
#define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
|
|
#define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
|
|
((0xfe70 <= (c)) && ((c) <= 0xfefc)))
|
|
#define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
|
|
((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
|
|
#define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
|
|
(IS_RTL_PRESENTATION_FORM(c)) || \
|
|
(c) == 0xD802 || (c) == 0xD803)
|
|
#define UTF32_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
|
|
(IS_RTL_PRESENTATION_FORM(c)) || \
|
|
(IS_IN_SMP_RTL_BLOCK(c)))
|
|
#endif /* nsBidiUtils_h__ */
|