diff --git a/layout/generic/nsTextTransformer.cpp b/layout/generic/nsTextTransformer.cpp index dfbe7096f23..edc03e1ae3b 100644 --- a/layout/generic/nsTextTransformer.cpp +++ b/layout/generic/nsTextTransformer.cpp @@ -59,7 +59,7 @@ #include "nsLayoutAtoms.h" #endif - +nsICaseConversion* nsTextTransformer::gCaseConv = nsnull; PRBool nsTextTransformer::sWordSelectListenerPrefChecked = PR_FALSE; PRBool nsTextTransformer::sWordSelectEatSpaceAfter = PR_FALSE; PRBool nsTextTransformer::sWordSelectStopAtPunctuation = PR_FALSE; @@ -120,8 +120,6 @@ nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead) //---------------------------------------------------------------------- -static nsICaseConversion* gCaseConv = nsnull; - nsresult nsTextTransformer::Initialize() { @@ -142,7 +140,9 @@ nsTextTransformer::Initialize() return NS_OK; } -static nsresult EnsureCaseConv() + +nsresult +nsTextTransformer::EnsureCaseConv() { nsresult res = NS_OK; if (!gCaseConv) { @@ -153,6 +153,13 @@ static nsresult EnsureCaseConv() return res; } +nsICaseConversion* +nsTextTransformer::GetCaseConv() +{ + EnsureCaseConv(); + return gCaseConv; +} + void nsTextTransformer::Shutdown() { diff --git a/layout/generic/nsTextTransformer.h b/layout/generic/nsTextTransformer.h index 21b4bdd07ac..e69de29bb2d 100644 --- a/layout/generic/nsTextTransformer.h +++ b/layout/generic/nsTextTransformer.h @@ -1,442 +0,0 @@ -/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ -/* ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Mozilla Communicator client code. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1998 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either of the GNU General Public License Version 2 or later (the "GPL"), - * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - * ***** END LICENSE BLOCK ***** */ - -/* - * class for transformation of text before rendering, including CSS - * text-transform - */ - -#ifndef nsTextTransformer_h___ -#define nsTextTransformer_h___ - -#include "nsTextFragment.h" -#include "nsISupports.h" -#include "nsPresContext.h" -#include "nsIObserver.h" -#ifdef IBMBIDI -#include "nsBidi.h" -#include "nsBidiUtils.h" -#endif - -class nsIContent; -class nsIFrame; -class nsILineBreaker; -class nsIWordBreaker; - -// XXX I'm sure there are other special characters -#define CH_NBSP 160 -#define CH_ENSP 8194 // -#define CH_EMSP 8195 // -#define CH_THINSP 8291 // -#define CH_SHY 173 -#define CH_CJKSP 12288 // U+3000 IDEOGRAPHIC SPACE (CJK Full-Width Space) - -#ifdef IBMBIDI -#define CH_LRM 8206 // -#define CH_RLM 8207 // -#define CH_LRE 8234 // -#define CH_RLE 8235 // -#define CH_PDF 8236 // -#define CH_LRO 8237 // -#define CH_RLO 8238 // - -#define IS_BIDI_CONTROL(_ch) \ - (((_ch) >= CH_LRM && (_ch) <= CH_RLM) \ - || ((_ch) >= CH_LRE && (_ch) <= CH_RLO)) -#endif // IBMBIDI - -// For now, we have only a couple of characters to strip out. If we get -// any more, change this to use a bitset to lookup into. -// CH_SHY - soft hyphen (discretionary hyphen) -#ifdef IBMBIDI -// added BIDI formatting codes -#define IS_DISCARDED(_ch) \ - (((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch)) -#else -#define IS_DISCARDED(_ch) \ - (((_ch) == CH_SHY) || ((_ch) == '\r')) -#endif - -/* Unicode codepoints that could be the first of a lam alef pair converted to - * a lam alef ligature by ArabicShaping() - */ -#define IS_LAM(_ch) \ - (((_ch) == 0x0644) || /* ARABIC LETTER LAM */ \ - ((_ch) == 0xfedf) || /* ARABIC LETTER LAM INITIAL FORM */ \ - ((_ch) == 0xfee0)) /* ARABIC LETTER LAM MEDIAL FORM */ \ - -/* Unicode codepoints that could be the second of a lam alef pair converted to - * a lam alef ligature by ArabicShaping() - */ -#define IS_ALEF(_ch) \ - (((_ch) == 0x0622) || /* ARABIC LETTER ALEF WITH MADDA ABOVE */ \ - ((_ch) == 0x0623) || /* ARABIC LETTER ALEF WITH HAMZA ABOVE */ \ - ((_ch) == 0x0625) || /* ARABIC LETTER ALEF WITH HAMZA BELOW */ \ - ((_ch) == 0x0627) || /* ARABIC LETTER ALEF */ \ - ((_ch) == 0xfe82) || /* ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM */ \ - ((_ch) == 0xfe84) || /* ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM */ \ - ((_ch) == 0xfe88) || /* ARABIC LETTER ALEF WITH HAMZA BELOW FINAL FORM */ \ - ((_ch) == 0xfe8e)) /* ARABIC LETTER ALEF FINAL FORM */ - -/* Unicode codepoints that could have been converted from a lam alef pair to a - * lam alef ligature by Arabic Shaping() - */ -#define IS_LAMALEF(_ch) (((_ch) >= 0xfef5) && ((_ch) <= 0xfefc)) - /* FEF5 ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM */ - /* FEF6 ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM */ - /* FEF7 ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM */ - /* FEF8 ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM */ - /* FEF9 ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM */ - /* FEFA ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM */ - /* FEFB ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM */ - /* FEFC;ARABIC LIGATURE LAM WITH ALEF FINAL FORM */ - -#define IS_ASCII_CHAR(ch) ((ch&0xff80) == 0) - -#define NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE 128 // used to be 256 - -// Indicates whether the transformed text should be left as ascii -#define NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII 1 - -// If at any point during GetNextWord or GetPrevWord we -// run across a multibyte (> 127) unicode character. -#define NS_TEXT_TRANSFORMER_HAS_MULTIBYTE 2 - -// The text in the transform buffer is ascii -#define NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII 4 - -#ifdef IBMBIDI -// The text in the transform buffer needs Arabic shaping -#define NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING 8 - -// The text in the transform buffer needs numeric shaping -#define NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING 16 - -// The frame containing the text was resolved as right-to-left by the -// Bidi Algorithm -#define NS_TEXT_TRANSFORMER_FRAME_IS_RTL 32 -#endif - -// A growable text buffer that tries to avoid using malloc by having a -// builtin buffer. Ideally used as an automatic variable. -class nsAutoTextBuffer { -public: - nsAutoTextBuffer(); - ~nsAutoTextBuffer(); - - nsresult GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead = PR_TRUE); - - nsresult GrowTo(PRInt32 aNewSize, PRBool aCopyToHead = PR_TRUE); - - PRUnichar* GetBuffer() { return mBuffer; } - PRUnichar* GetBufferEnd() { return mBuffer + mBufferLen; } - PRInt32 GetBufferLength() const { return mBufferLen; } - - PRUnichar* mBuffer; - PRInt32 mBufferLen; - PRUnichar mAutoBuffer[NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE]; -}; - -//---------------------------------------- - -/** - * This object manages the transformation of text: - * - * - * - * Note that no transformations are applied that would impact word - * breaking (like mapping   into space, for example). In - * addition, this logic will not strip leading or trailing whitespace - * (across the entire run of text; leading whitespace can be skipped - * for a frames text because of whitespace compression). - */ -class nsTextTransformer { -public: - // Note: The text transformer does not hold a reference to the line - // breaker and work breaker objects - nsTextTransformer(nsPresContext* aPresContext); - - ~nsTextTransformer(); - - /** - * Initialize the text transform. Use GetNextWord() and GetPrevWord() - * to iterate the text - * - * The default is to transform all text to Unicode; however, you can - * specify that the text should be left as ascii if possible. Note that - * we don't step the text down from Unicode to ascii (even if it doesn't - * contain multibyte characters) so this only happens for text fragments - * that contain 1-byte text. - * XXX This is currently not implemented for GetPreviousWord() - * @see TransformedTextIsAscii() - */ - nsresult Init(nsIFrame* aFrame, - nsIContent* aContent, - PRInt32 aStartingOffset, - PRBool aForceArabicShaping = PR_FALSE, - PRBool aLeaveAsAscii = PR_FALSE); - - PRInt32 GetContentLength() const { - return mFrag ? mFrag->GetLength() : 0; - } - - PRUnichar GetContentCharAt(PRInt32 aIndex) { - return (mFrag && aIndex < mFrag->GetLength()) ? mFrag->CharAt(aIndex) : 0; - } - - /** - * Iterates the next word in the text fragment. - * - * Returns a pointer to the word, the number of characters in the word, the - * content length of the word, whether it is whitespace, and whether the - * text was transformed (any of the transformations listed above). The content - * length can be greater than the word length if whitespace compression occurred - * or if characters were discarded - * - * The default behavior is to reset the transform buffer to the beginning, - * but you can choose to not reste it and buffer across multiple words - */ - PRUnichar* GetNextWord(PRBool aInWord, - PRInt32* aWordLenResult, - PRInt32* aContentLenResult, - PRBool* aIsWhitespaceResult, - PRBool* aWasTransformed, - PRBool aResetTransformBuf = PR_TRUE, - PRBool aForLineBreak = PR_TRUE, - PRBool aIsKeyboardSelect = PR_FALSE); - - PRUnichar* GetPrevWord(PRBool aInWord, - PRInt32* aWordLenResult, - PRInt32* aContentLenResult, - PRBool* aIsWhitespaceResult, - PRBool aForLineBreak = PR_TRUE, - PRBool aIsKeyboardSelect = PR_FALSE); - - - // Returns PR_TRUE if the LEAVE_AS_ASCII flag is set - PRBool LeaveAsAscii() const { - return (mFlags & NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII) != 0; - } - - // Returns PR_TRUE if any of the characters are multibyte (greater than 127) - PRBool HasMultibyte() const { - return (mFlags & NS_TEXT_TRANSFORMER_HAS_MULTIBYTE) != 0; - } - - // Returns PR_TRUE if the text in the transform bufer is ascii (i.e., it - // doesn't contain any multibyte characters) - PRBool TransformedTextIsAscii() const { - return (mFlags & NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII) != 0; - } - -#ifdef IBMBIDI - // Returns PR_TRUE if the text in the transform bufer needs Arabic - // shaping - PRBool NeedsArabicShaping() const { - return (mFlags & NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING) != 0; - } - - // Returns PR_TRUE if the text in the transform bufer needs numeric - // shaping - PRBool NeedsNumericShaping() const { - return (mFlags & NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING) != 0; - } - - // Returns PR_TRUE if the frame containing the text is right-to-left - PRBool FrameIsRTL() const { - return (mFlags & NS_TEXT_TRANSFORMER_FRAME_IS_RTL) != 0; - } -#endif - - // Set or clears the LEAVE_AS_ASCII bit - void SetLeaveAsAscii(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII : - mFlags &= (~NS_TEXT_TRANSFORMER_LEAVE_AS_ASCII); - } - - // Set or clears the NS_TEXT_TRANSFORMER_HAS_MULTIBYTE bit - void SetHasMultibyte(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_HAS_MULTIBYTE : - mFlags &= (~NS_TEXT_TRANSFORMER_HAS_MULTIBYTE); - } - - // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII bit - void SetTransformedTextIsAscii(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII : - mFlags &= (~NS_TEXT_TRANSFORMER_TRANSFORMED_TEXT_IS_ASCII); - } - -#ifdef IBMBIDI - // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_DO_ARABIC_SHAPING bit - void SetNeedsArabicShaping(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING : - mFlags &= (~NS_TEXT_TRANSFORMER_DO_ARABIC_SHAPING); - } - - // Set or clears the NS_TEXT_TRANSFORMER_TRANSFORMED_DO_NUMERIC_SHAPING bit - void SetNeedsNumericShaping(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING : - mFlags &= (~NS_TEXT_TRANSFORMER_DO_NUMERIC_SHAPING); - } - - // Set or clears the NS_TEXT_TRANSFORMER_FRAME_IS_RTL bit - void SetFrameIsRTL(PRBool aValue) { - aValue ? mFlags |= NS_TEXT_TRANSFORMER_FRAME_IS_RTL: - mFlags &= (~NS_TEXT_TRANSFORMER_FRAME_IS_RTL); - } -#endif - - PRUnichar* GetWordBuffer() { - return mTransformBuf.GetBuffer(); - } - - PRInt32 GetWordBufferLength() const { - return mTransformBuf.GetBufferLength(); - } - - static PRBool GetWordSelectEatSpaceAfter() { - return sWordSelectEatSpaceAfter; - } - - static PRBool GetWordSelectStopAtPunctuation() { - return sWordSelectStopAtPunctuation; - } - - static nsresult Initialize(); - static void Shutdown(); - -protected: - // Helper methods for GetNextWord (F == forwards) - PRInt32 ScanNormalWhiteSpace_F(PRInt32 aFragLen); - PRInt32 ScanNormalAsciiText_F(PRInt32 aFragLen, - PRInt32* aWordLen, - PRBool* aWasTransformed); - PRInt32 ScanNormalAsciiText_F_ForWordBreak(PRInt32 aFragLen, - PRInt32* aWordLen, - PRBool* aWasTransformed, - PRBool aIsKeyboardSelect); - PRInt32 ScanNormalUnicodeText_F(PRInt32 aFragLen, - PRBool aForLineBreak, - PRInt32* aWordLen, - PRBool* aWasTransformed); - PRInt32 ScanPreWrapWhiteSpace_F(PRInt32 aFragLen, - PRInt32* aWordLen); - PRInt32 ScanPreAsciiData_F(PRInt32 aFragLen, - PRInt32* aWordLen, - PRBool* aWasTransformed); - PRInt32 ScanPreData_F(PRInt32 aFragLen, - PRInt32* aWordLen, - PRBool* aWasTransformed); - - // Helper methods for GetPrevWord (B == backwards) - PRInt32 ScanNormalWhiteSpace_B(); - PRInt32 ScanNormalAsciiText_B(PRInt32* aWordLen, PRBool aIsKeyboardSelect); - PRInt32 ScanNormalUnicodeText_B(PRBool aForLineBreak, PRInt32* aWordLen); - PRInt32 ScanPreWrapWhiteSpace_B(PRInt32* aWordLen); - PRInt32 ScanPreData_B(PRInt32* aWordLen); - - // Converts the current text in the transform buffer from ascii to - // Unicode - void ConvertTransformedTextToUnicode(); - - void LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen, - PRBool* aWasTransformed); - - void DoArabicShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed); - - void DoNumericShaping(PRUnichar* aText, PRInt32& aTextLength, PRBool* aWasTransformed); - - // The text fragment that we are looking at - const nsTextFragment* mFrag; - - // Our current offset into the text fragment - PRInt32 mOffset; - - // The frame's white-space mode we are using to process text - enum { - eNormal, - ePreformatted, - ePreWrap - } mMode; - - nsLanguageSpecificTransformType mLanguageSpecificTransformType; - -#ifdef IBMBIDI - nsPresContext* mPresContext; - nsCharType mCharType; -#endif - - // Buffer used to hold the transformed words from GetNextWord or - // GetPrevWord - nsAutoTextBuffer mTransformBuf; - - // Our current position within the buffer. Used when iterating the next - // word, because we may be requested to buffer across multiple words - PRInt32 mBufferPos; - - // The frame's text-transform state - PRUint8 mTextTransform; - - // Flag for controlling mLeaveAsAscii, mHasMultibyte, mTransformedTextIsAscii - PRUint8 mFlags; - - // prefs used to configure the double-click word selection behavior - static int WordSelectPrefCallback(const char* aPref, void* aClosure); - static PRBool sWordSelectListenerPrefChecked; // have we read the prefs yet? - static PRBool sWordSelectEatSpaceAfter; // should we include whitespace up to next word? - static PRBool sWordSelectStopAtPunctuation; // should we stop at punctuation? - -#ifdef DEBUG - static void SelfTest(nsPresContext* aPresContext); - - nsresult Init2(const nsTextFragment* aFrag, - PRInt32 aStartingOffset, - PRUint8 aWhiteSpace, - PRUint8 aTextTransform); -#endif -}; - -#endif /* nsTextTransformer_h___ */