pjs/layout/generic/nsTextTransformer.cpp

1853 строки
55 KiB
C++
Исходник Обычный вид История

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
1998-10-20 04:17:17 +04:00
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
1998-10-20 04:17:17 +04:00
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
1998-10-20 04:17:17 +04:00
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
2000-04-12 19:52:50 +04:00
#include <ctype.h>
#include "nsCOMPtr.h"
1998-10-20 04:17:17 +04:00
#include "nsTextTransformer.h"
#include "nsContentUtils.h"
1998-10-20 04:17:17 +04:00
#include "nsIContent.h"
#include "nsIFrame.h"
#include "nsITextContent.h"
#include "nsStyleConsts.h"
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
#include "nsICaseConversion.h"
#include "prenv.h"
#ifdef IBMBIDI
#include "nsLayoutAtoms.h"
#endif
PRBool nsTextTransformer::sWordSelectListenerPrefChecked = PR_FALSE;
PRBool nsTextTransformer::sWordSelectEatSpaceAfter = PR_FALSE;
PRBool nsTextTransformer::sWordSelectStopAtPunctuation = PR_FALSE;
static const char kWordSelectEatSpaceAfterPref[] = "layout.word_select.eat_space_to_next_word";
static const char kWordSelectStopAtPunctuationPref[] = "layout.word_select.stop_at_punctuation";
// static
int
nsTextTransformer::WordSelectPrefCallback(const char* aPref, void* aClosure)
{
sWordSelectEatSpaceAfter = nsContentUtils::GetBoolPref(kWordSelectEatSpaceAfterPref);
sWordSelectStopAtPunctuation = nsContentUtils::GetBoolPref(kWordSelectStopAtPunctuationPref);
return 0;
}
nsAutoTextBuffer::nsAutoTextBuffer()
: mBuffer(mAutoBuffer),
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
{
}
nsAutoTextBuffer::~nsAutoTextBuffer()
{
if (mBuffer && (mBuffer != mAutoBuffer)) {
delete [] mBuffer;
}
}
nsresult
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
{
PRInt32 newSize = mBufferLen * 2;
if (newSize < mBufferLen + aAtLeast) {
newSize = mBufferLen + aAtLeast + 100;
}
return GrowTo(newSize, aCopyToHead);
}
nsresult
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
{
if (aNewSize > mBufferLen) {
PRUnichar* newBuffer = new PRUnichar[aNewSize];
if (!newBuffer) {
return NS_ERROR_OUT_OF_MEMORY;
}
memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
mBuffer, sizeof(PRUnichar) * mBufferLen);
if (mBuffer != mAutoBuffer) {
delete [] mBuffer;
}
mBuffer = newBuffer;
mBufferLen = aNewSize;
}
return NS_OK;
}
//----------------------------------------------------------------------
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
static nsICaseConversion* gCaseConv = nsnull;
nsresult
nsTextTransformer::Initialize()
{
// read in our global word selection prefs
if ( !sWordSelectListenerPrefChecked ) {
sWordSelectListenerPrefChecked = PR_TRUE;
sWordSelectEatSpaceAfter =
nsContentUtils::GetBoolPref(kWordSelectEatSpaceAfterPref);
sWordSelectStopAtPunctuation =
nsContentUtils::GetBoolPref(kWordSelectStopAtPunctuationPref);
nsContentUtils::RegisterPrefCallback(kWordSelectEatSpaceAfterPref,
WordSelectPrefCallback, nsnull);
nsContentUtils::RegisterPrefCallback(kWordSelectStopAtPunctuationPref,
WordSelectPrefCallback, nsnull);
}
return NS_OK;
}
static nsresult EnsureCaseConv()
{
nsresult res = NS_OK;
if (!gCaseConv) {
res = CallGetService(kUnicharUtilCID, &gCaseConv);
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
}
return res;
}
void
nsTextTransformer::Shutdown()
{
nsContentUtils::UnregisterPrefCallback(kWordSelectEatSpaceAfterPref,
WordSelectPrefCallback, nsnull);
nsContentUtils::UnregisterPrefCallback(kWordSelectStopAtPunctuationPref,
WordSelectPrefCallback, nsnull);
NS_IF_RELEASE(gCaseConv);
}
// For now, we have only a couple of characters to strip out. If we get
// any more, change this to use a bitset to lookup into.
// CH_SHY - soft hyphen (discretionary hyphen)
#ifdef IBMBIDI
// added BIDI formatting codes
#define IS_DISCARDED(_ch) \
(((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch))
#else
#define IS_DISCARDED(_ch) \
(((_ch) == CH_SHY) || ((_ch) == '\r'))
#endif
1998-10-20 04:17:17 +04:00
#define MAX_UNIBYTE 127
MOZ_DECL_CTOR_COUNTER(nsTextTransformer)
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker,
nsPresContext* aPresContext)
: mFrag(nsnull),
mOffset(0),
mMode(eNormal),
mLineBreaker(aLineBreaker),
mWordBreaker(aWordBreaker),
mBufferPos(0),
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE),
mFlags(0)
1998-10-20 04:17:17 +04:00
{
MOZ_COUNT_CTOR(nsTextTransformer);
mLanguageSpecificTransformType =
aPresContext->LanguageSpecificTransformType();
#ifdef IBMBIDI
mPresContext = aPresContext;
#endif
if (aLineBreaker == nsnull && aWordBreaker == nsnull )
NS_ASSERTION(0, "invalid creation of nsTextTransformer");
#ifdef DEBUG
static PRBool firstTime = PR_TRUE;
if (firstTime) {
firstTime = PR_FALSE;
SelfTest(aLineBreaker, aWordBreaker, aPresContext);
}
#endif
1998-10-20 04:17:17 +04:00
}
nsTextTransformer::~nsTextTransformer()
{
MOZ_COUNT_DTOR(nsTextTransformer);
1998-10-20 04:17:17 +04:00
}
nsresult
nsTextTransformer::Init(nsIFrame* aFrame,
nsIContent* aContent,
PRInt32 aStartingOffset,
PRBool aForceArabicShaping,
PRBool aLeaveAsAscii)
1998-10-20 04:17:17 +04:00
{
/*
* If the document has Bidi content, check whether we need to do
* Arabic shaping.
*
* Does the frame contains Arabic characters
* (mCharType == eCharType_RightToLeftArabic)?
* Are we rendering character by character (aForceArabicShaping ==
* PR_TRUE)? If so, we always do our own Arabic shaping, even if
* the platform has native shaping support. Otherwise, we only do
* shaping if the platform has no shaping support.
*
* We do numeric shaping in all Bidi documents.
*/
if (mPresContext->BidiEnabled()) {
mCharType = (nsCharType)NS_PTR_TO_INT32(mPresContext->PropertyTable()->GetProperty(aFrame, nsLayoutAtoms::charType));
if (mCharType == eCharType_RightToLeftArabic) {
if (aForceArabicShaping) {
SetNeedsArabicShaping(PR_TRUE);
}
else {
if (!mPresContext->IsBidiSystem()) {
SetNeedsArabicShaping(PR_TRUE);
}
}
}
SetNeedsNumericShaping(PR_TRUE);
}
// Get the contents text content
nsresult rv;
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
if (tc.get()) {
mFrag = tc->Text();
// Sanitize aStartingOffset
if (aStartingOffset < 0) {
NS_WARNING("bad starting offset");
aStartingOffset = 0;
}
else if (aStartingOffset > mFrag->GetLength()) {
NS_WARNING("bad starting offset");
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
const nsStyleText* styleText = aFrame->GetStyleText();
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = styleText->mTextTransform;
if (aLeaveAsAscii) { // See if the text fragment is 1-byte text
SetLeaveAsAscii(PR_TRUE);
// XXX Currently we only leave it as ascii for normal text and not for preformatted
// or preformatted wrapped text or language specific transforms
if (mFrag->Is2b() || (eNormal != mMode) ||
(mLanguageSpecificTransformType !=
eLanguageSpecificTransformType_None))
// We don't step down from Unicode to ascii
SetLeaveAsAscii(PR_FALSE);
}
else
SetLeaveAsAscii(PR_FALSE);
1998-10-20 04:17:17 +04:00
}
return rv;
1998-10-20 04:17:17 +04:00
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_F()
1998-10-20 04:17:17 +04:00
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
for (; offset < fragLen; offset++) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
1998-10-20 04:17:17 +04:00
}
}
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
if (TransformedTextIsAscii()) {
unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer;
bp[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
return offset;
1998-10-20 04:17:17 +04:00
}
void
nsTextTransformer::ConvertTransformedTextToUnicode()
{
// Go backwards over the characters and convert them.
PRInt32 lastChar = mBufferPos - 1;
unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar;
PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar;
NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos,
"transform buffer is too small");
for (PRInt32 count = mBufferPos; count > 0; count--) {
*cp2-- = PRUnichar(*cp1--);
}
}
1998-10-20 04:17:17 +04:00
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
1998-10-20 04:17:17 +04:00
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
for (; offset < fragLen; offset++) {
unsigned char ch = *cp++;
if (XP_IS_SPACE(ch)) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
1998-10-20 04:17:17 +04:00
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
PRInt32
nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen,
PRBool* aWasTransformed,
PRBool aIsKeyboardSelect)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
PRBool breakAfterThis = PR_FALSE;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
PRBool readingAlphaNumeric = PR_TRUE; //only used in sWordSelectStopAtPunctuation
// We must know if we are starting in alpha numerics.
// Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters
// We can't trust isalnum() results for isalnum()
// Therefore we don't stop at non-ascii (high bit) punctuation,
// which is just fine. The punctuation we care about is low bit.
if (sWordSelectStopAtPunctuation && offset < fragLen)
readingAlphaNumeric = isalnum((unsigned char)*cp) || !IS_ASCII_CHAR(*cp);
for (; offset < fragLen && !breakAfterThis; offset++) {
unsigned char ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
if (offset == mOffset)
breakAfterThis = PR_TRUE;
else
break;
}
else if (XP_IS_SPACE(ch)) {
break;
}
else if (sWordSelectStopAtPunctuation &&
readingAlphaNumeric && !isalnum(ch) && IS_ASCII_CHAR(ch)) {
if (!aIsKeyboardSelect)
break;
// For keyboard move-by-word, need to pass by at least
// one alphanumeric char before stopping at punct
readingAlphaNumeric = PR_FALSE;
}
else if (sWordSelectStopAtPunctuation &&
!readingAlphaNumeric && (isalnum(ch) || !IS_ASCII_CHAR(ch))) {
// On some platforms, punctuation breaks for word selection
break;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 fragLen = frag->GetLength();
#ifdef IBMBIDI
if (*aWordLen > 0 && *aWordLen < fragLen) {
fragLen = *aWordLen;
}
#endif
PRInt32 offset = mOffset;
PRUnichar firstChar = frag->CharAt(offset++);
#ifdef IBMBIDI
// Need to strip BIDI controls even when those are 'firstChars'.
// This doesn't seem to produce bug 14280 (or similar bugs).
while (offset < fragLen && IS_BIDI_CONTROL(firstChar) ) {
firstChar = frag->CharAt(offset++);
}
#endif // IBMBIDI
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
// Only evaluate complex breaking logic if there are more characters
// beyond the first to look at.
PRInt32 numChars = 1;
if (offset < fragLen) {
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
else {
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
// don't transform the first character until after BreakInBetween is called
// Kipp originally did this at the top of the function, which was too early.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
if (NS_FAILED(rv)) {
*aWordLen = 0;
return offset - 1;
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
if (!breakBetween) {
// Find next position
PRBool tryNextFrag;
PRUint32 next;
if (aForLineBreak) {
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
1998-10-22 00:05:31 +04:00
}
else {
mWordBreaker->NextWord(cp0, fragLen, offset, &next, &tryNextFrag);
}
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
// Since we know the number of characters we're adding grow the buffer
// now before we start copying
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
}
offset += numChars - 1;
// 1. convert nbsp into space
// 2. check for discarded characters
// 3. check mHasMultibyte flag
// 4. copy buffer
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
const PRUnichar* end = cp + numChars - 1;
while (cp < end) {
PRUnichar ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
// Strip discarded characters from the transformed output
numChars--;
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*bp++ = ch;
mBufferPos++;
1998-10-22 00:05:31 +04:00
}
}
1998-10-22 00:05:31 +04:00
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
else
{ // transform the first character
// we do this here, rather than at the top of the function (like Kipp originally had it)
// because if we must call BreakInBetween, then we must do so before the transformation
// this is the case where BreakInBetween does not need to be called at all.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
if (NS_FAILED(rv)) {
*aWordLen = 0;
return offset - 1;
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
}
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
if (IS_DISCARDED(ch)) {
// Keep looping if this is a discarded character
continue;
}
break;
}
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ' ';
mBufferPos++;
1998-10-20 04:17:17 +04:00
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
1998-10-20 04:17:17 +04:00
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
const unsigned char* cp = (const unsigned char*) frag->Get1b();
const unsigned char* end = cp + frag->GetLength();
PRInt32 prevBufferPos = mBufferPos;
cp += mOffset;
while (cp < end) {
PRUnichar ch = (PRUnichar) *cp++;
if ((ch == '\t') || (ch == '\n')) {
cp--;
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return cp - ((const unsigned char*)frag->Get1b());
}
//----------------------------------------
static void
AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = tolower(*aText);
aText++;
}
}
static void
AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = toupper(*aText);
aText++;
}
}
#define kSzlig 0x00DF
static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len)
{
PRInt32 i,cnt;
for(i=0,cnt=0; i<len; i++, aText++)
{
if(kSzlig == *aText)
cnt++;
}
return cnt;
}
static void ReplaceGermanSzligToSS(PRUnichar* aText, PRInt32 len, PRInt32 szCnt)
{
PRUnichar *src, *dest;
src = aText + len - 1;
dest = src + szCnt;
while( (src!=dest) && (src >= aText) )
{
if(kSzlig == *src )
{
*dest-- = PRUnichar('S');
*dest-- = PRUnichar('S');
src--;
} else {
*dest-- = *src--;
}
}
}
void
nsTextTransformer::LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen,
PRBool* aWasTransformed)
{
if (mLanguageSpecificTransformType ==
eLanguageSpecificTransformType_Japanese) {
for (PRInt32 i = 0; i < aLen; i++) {
if (aText[i] == 0x5C) { // BACKSLASH
aText[i] = 0xA5; // YEN SIGN
SetHasMultibyte(PR_TRUE);
*aWasTransformed = PR_TRUE;
}
#if 0
/*
* We considered doing this, but since some systems may not have fonts
* with this OVERLINE glyph, we decided not to do this.
*/
else if (aText[i] == 0x7E) { // TILDE
aText[i] = 0x203E; // OVERLINE
SetHasMultibyte(PR_TRUE);
*aWasTransformed = PR_TRUE;
}
#endif
}
}
/* we once do transformation for Korean, but later decide to remove it */
/* see bug 88050 for more information */
}
PRUnichar*
nsTextTransformer::GetNextWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool* aWasTransformed,
PRBool aResetTransformBuf,
PRBool aForLineBreak,
PRBool aIsKeyboardSelect)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
#ifdef IBMBIDI
if (*aWordLenResult > 0 && *aWordLenResult < fragLen) {
fragLen = *aWordLenResult;
}
#endif
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
PRBool prevBufferPos;
PRBool skippedWhitespace = PR_FALSE;
// Initialize OUT parameter
*aWasTransformed = PR_FALSE;
// See if we should reset the current buffer position back to the
// beginning of the buffer
if (aResetTransformBuf) {
mBufferPos = 0;
SetTransformedTextIsAscii(LeaveAsAscii());
}
prevBufferPos = mBufferPos;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
while (offset < fragLen) {
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
offset++;
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_F();
// if this is just a '\n', and characters before and after it are CJK chars,
// we will skip this one.
if (firstChar == '\n' &&
offset - mOffset == 1 &&
mOffset > 0 &&
offset < fragLen)
{
PRUnichar lastChar = frag->CharAt(mOffset - 1);
PRUnichar nextChar = frag->CharAt(offset);
if (IS_CJ_CHAR(lastChar) && IS_CJ_CHAR(nextChar)) {
skippedWhitespace = PR_TRUE;
--mBufferPos;
mOffset = offset;
continue; }
}
if (firstChar != ' ') {
*aWasTransformed = PR_TRUE;
}
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
*aWasTransformed = PR_TRUE;
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
offset++;
if (TransformedTextIsAscii()) {
((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
}
else if (frag->Is2b()) {
#ifdef IBMBIDI
wordLen = *aWordLenResult;
#endif
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen,
aWasTransformed,
aIsKeyboardSelect);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanPreData_F(&wordLen, aWasTransformed);
}
else {
offset = ScanPreAsciiData_F(&wordLen, aWasTransformed);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_F(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
#ifdef IBMBIDI
wordLen = *aWordLenResult;
#endif
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed,
aIsKeyboardSelect);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
}
if (TransformedTextIsAscii()) {
unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
*wordPtr = toupper(*wordPtr);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
AsciiToLowerCase(wordPtr, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
AsciiToUpperCase(wordPtr, wordLen);
break;
}
NS_ASSERTION(mLanguageSpecificTransformType ==
eLanguageSpecificTransformType_None,
"should not be ASCII for language specific transforms");
}
result = (PRUnichar*)wordPtr;
} else {
result = &mTransformBuf.mBuffer[prevBufferPos];
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
// if the first character is szlig
if(kSzlig == *result)
{
if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
PRUnichar* src = result + wordLen;
while(src>result)
{
*(src+1) = *src;
src--;
}
result[0] = PRUnichar('S');
result[1] = PRUnichar('S');
wordLen++;
}
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
{
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToUpper(result, result, wordLen);
// first we search for German Szlig
PRInt32 szligCnt = CountGermanSzlig(result, wordLen);
if(szligCnt > 0) {
// Make sure we have enough room in the transform buffer
if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen)
{
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
ReplaceGermanSzligToSS(result, wordLen, szligCnt);
wordLen += szligCnt;
}
}
break;
}
if (mLanguageSpecificTransformType !=
eLanguageSpecificTransformType_None) {
LanguageSpecificTransform(result, wordLen, aWasTransformed);
}
if (NeedsArabicShaping()) {
DoArabicShaping(result, wordLen, aWasTransformed);
}
if (NeedsNumericShaping()) {
DoNumericShaping(result, wordLen, aWasTransformed);
}
}
}
break;
}
*aIsWhiteSpaceResult = isWhitespace;
*aWordLenResult = wordLen;
*aContentLenResult = offset - mOffset;
// we need to adjust the length if a '\n' has been skip between CJK chars
*aContentLenResult += (skippedWhitespace ? 1 : 0);
1998-10-20 04:17:17 +04:00
// If the word length doesn't match the content length then we transformed
// the text
if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) ||
(*aWordLenResult != *aContentLenResult)) {
*aWasTransformed = PR_TRUE;
mBufferPos = prevBufferPos + *aWordLenResult;
}
1998-10-22 00:05:31 +04:00
mOffset = offset;
NS_ASSERTION(mBufferPos == prevBufferPos + *aWordLenResult, "internal error");
return result;
1998-10-20 04:17:17 +04:00
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_B()
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
}
}
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen, PRBool aIsKeyboardSelect)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
PRUnichar ch = frag->CharAt(offset - 1);
// Treat high bit chars as alphanumeric, otherwise we get stuck on accented letters
// We can't trust isalnum() results for isalnum()
// Therefore we don't stop at non-ascii (high bit) punctuation,
// which is just fine. The punctuation we care about is low bit.
PRBool readingAlphaNumeric = isalnum(ch) || !IS_ASCII_CHAR(ch);
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (CH_NBSP == ch) {
ch = ' ';
}
if (XP_IS_SPACE(ch)) {
break;
}
else if (IS_DISCARDED(ch)) {
continue;
}
else if (sWordSelectStopAtPunctuation && readingAlphaNumeric &&
!isalnum(ch) && IS_ASCII_CHAR(ch)) {
// Break on ascii punctuation
break;
}
else if (sWordSelectStopAtPunctuation && !readingAlphaNumeric &&
(isalnum(ch) || !IS_ASCII_CHAR(ch))) {
if (!aIsKeyboardSelect)
break;
readingAlphaNumeric = PR_TRUE;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 offset = mOffset - 1;
PRUnichar firstChar = frag->CharAt(offset);
#ifdef IBMBIDI
PRInt32 limit = (*aWordLen > 0) ? *aWordLen : 0;
while (offset > limit && IS_BIDI_CONTROL(firstChar) ) {
firstChar = frag->CharAt(--offset);
}
#endif
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
PRInt32 numChars = 1;
#ifdef IBMBIDI
if (offset > limit) {
#else
if (offset > 0) {
#endif
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
else {
mWordBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
if (!breakBetween) {
// Find next position
PRBool tryPrevFrag;
PRUint32 prev;
if (aForLineBreak) {
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
}
else {
mWordBreaker->PrevWord(cp0, offset, offset, &prev, &tryPrevFrag);
}
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
// Grow buffer before copying
nsresult rv = mTransformBuf.GrowTo(numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength();
}
// 1. convert nbsp into space
// 2. check mHasMultibyte flag
// 3. copy buffer
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
const PRUnichar* end = cp - numChars + 1;
while (cp > end) {
PRUnichar ch = *--cp;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*--bp = ch;
}
// Recompute offset and numChars in case we stripped something
offset = offset - numChars;
numChars = mTransformBuf.GetBufferEnd() - bp;
}
}
else
offset--;
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
// Keep looping if this is a discarded character
if (IS_DISCARDED(ch)) {
continue;
}
break;
}
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ' ';
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
offset++;
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
1999-08-19 10:06:57 +04:00
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
1999-08-19 10:06:57 +04:00
//----------------------------------------
1999-08-19 10:06:57 +04:00
PRUnichar*
nsTextTransformer::GetPrevWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool aForLineBreak,
PRBool aIsKeyboardSelect)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
#ifdef IBMBIDI
PRInt32 limit = (*aWordLenResult > 0) ? *aWordLenResult : 0;
while (--offset >= limit) {
#else
while (--offset >= 0) {
#endif
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_B();
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
offset--;
} else if (frag->Is2b()) {
#ifdef IBMBIDI
wordLen = *aWordLenResult;
#endif
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
isWhitespace = PR_TRUE;
}
else {
offset = ScanPreData_B(&wordLen);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_B(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
#ifdef IBMBIDI
wordLen = *aWordLenResult;
#endif
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen, aIsKeyboardSelect);
}
break;
}
// Backwards scanning routines *always* overshoot by one for the
// returned offset value.
offset = offset + 1;
result = mTransformBuf.GetBufferEnd() - wordLen;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
if(NS_SUCCEEDED(EnsureCaseConv()))
gCaseConv->ToUpper(result, result, wordLen);
break;
}
}
break;
}
*aWordLenResult = wordLen;
*aContentLenResult = mOffset - offset;
*aIsWhiteSpaceResult = isWhitespace;
mOffset = offset;
return result;
}
void
nsTextTransformer::DoArabicShaping(PRUnichar* aText,
PRInt32& aTextLength,
PRBool* aWasTransformed)
{
if (aTextLength <= 0)
return;
PRInt32 newLen;
PRBool isVisual = mPresContext->IsVisualMode();
nsAutoString buf;
buf.SetLength(aTextLength);
PRUnichar* buffer = buf.BeginWriting();
ArabicShaping(aText, buf.Length(), buffer, (PRUint32 *)&newLen, !isVisual, !isVisual);
aTextLength = newLen;
*aWasTransformed = PR_TRUE;
StripZeroWidthJoinControls(buffer, aText, aTextLength, aWasTransformed);
}
void
nsTextTransformer::DoNumericShaping(PRUnichar* aText,
PRInt32& aTextLength,
PRBool* aWasTransformed)
{
if (aTextLength <= 0)
return;
PRUint32 bidiOptions = mPresContext->GetBidi();
switch (GET_BIDI_OPTION_NUMERAL(bidiOptions)) {
case IBMBIDI_NUMERAL_HINDI:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
break;
case IBMBIDI_NUMERAL_ARABIC:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
break;
case IBMBIDI_NUMERAL_REGULAR:
switch (mCharType) {
case eCharType_EuropeanNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
break;
case eCharType_ArabicNumber:
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
break;
default:
break;
}
break;
case IBMBIDI_NUMERAL_HINDICONTEXT:
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions)==IBMBIDI_TEXTDIRECTION_RTL) &&
(IS_ARABIC_DIGIT (aText[0]))) ||
(eCharType_ArabicNumber == mCharType))
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
else if (eCharType_EuropeanNumber == mCharType)
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
break;
case IBMBIDI_NUMERAL_NOMINAL:
default:
break;
}
}
void
nsTextTransformer::StripZeroWidthJoinControls(PRUnichar* aSource,
PRUnichar* aTarget,
PRInt32& aTextLength,
PRBool* aWasTransformed)
{
PRUnichar *src, *dest;
PRInt32 stripped = 0;
src = aSource;
dest = aTarget;
for (PRInt32 i = 0; i < aTextLength; ++i) {
while (*src == CH_ZWNJ || *src == CH_ZWJ) {
++stripped;
++src;
*aWasTransformed = PR_TRUE;
}
*dest++ = *src++;
}
aTextLength -= stripped;
}
//----------------------------------------------------------------------
// Self test logic for this class. This will (hopefully) make sure
// that the forward and backward word iterator methods continue to
// function as people change things...
#ifdef DEBUG
struct SelfTestSection {
int length;
int* data;
};
#define NUM_MODES 3
struct SelfTestData {
const PRUnichar* text;
SelfTestSection modes[NUM_MODES];
};
static PRUint8 preModeValue[NUM_MODES] = {
NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_WHITESPACE_PRE,
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
};
static PRUnichar test1text[] = {
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test1PreResults[] = { 9, 1, 12 };
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test2text[] = {
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
};
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static int test2PreResults[] = { 9, 1, 13 };
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static PRUnichar test3text[] = {
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
};
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
static int test3PreResults[] = { 7, 1, 3, };
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
static PRUnichar test4text[] = {
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test4PreResults[] = { 10, 1, 12 };
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test5text[] = {
CH_SHY, 0
};
static int test5Results[] = { 0 };
static int test5PreResults[] = { 0 };
static int test5PreWrapResults[] = { 0 };
#if 0
static PRUnichar test6text[] = {
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
0x30da, 0x30fc, 0x30b8, 0x3002, 0
};
static int test6Results[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
static int test6PreResults[] = { 20, 1, 13 };
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
#endif
static SelfTestData tests[] = {
{ test1text,
{ { sizeof(test1Results)/sizeof(int), test1Results, },
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
},
{ test2text,
{ { sizeof(test2Results)/sizeof(int), test2Results, },
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
},
{ test3text,
{ { sizeof(test3Results)/sizeof(int), test3Results, },
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
},
{ test4text,
{ { sizeof(test4Results)/sizeof(int), test4Results, },
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
},
{ test5text,
{ { sizeof(test5Results)/sizeof(int), test5Results, },
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
},
#if 0
{ test6text,
{ { sizeof(test6Results)/sizeof(int), test6Results, },
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
},
#endif
};
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
1999-08-19 10:06:57 +04:00
void
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker,
nsPresContext* aPresContext)
1999-08-19 10:06:57 +04:00
{
PRBool gNoisy = PR_FALSE;
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
gNoisy = PR_TRUE;
}
PRBool error = PR_FALSE;
PRInt32 testNum = 0;
SelfTestData* st = tests;
SelfTestData* last = st + NUM_TESTS;
for (; st < last; st++) {
PRUnichar* bp;
PRInt32 wordLen, contentLen;
PRBool ws, transformed;
PRBool isAsciiTest = PR_TRUE;
const PRUnichar* cp = st->text;
while (*cp) {
if (*cp > 255) {
isAsciiTest = PR_FALSE;
break;
}
cp++;
}
nsTextFragment frag(st->text);
nsTextTransformer tx(aLineBreaker, aWordBreaker, aPresContext);
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
// Do forwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout);
printf("'\n");
}
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
int* expectedResults = st->modes[preMode].data;
int resultsLen = st->modes[preMode].length;
#ifdef IBMBIDI
wordLen = -1;
#endif
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) {
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout);
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, *expectedResults, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
expectedResults++;
#ifdef IBMBIDI
wordLen = -1;
#endif
}
if (expectedResults != st->modes[preMode].data + resultsLen) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
// Do backwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(NS_ConvertUCS2toUTF8(uc2).get(), stdout);
printf("'\n");
}
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_TEXT_TRANSFORM_NONE);
expectedResults = st->modes[preMode].data + resultsLen;
#ifdef IBMBIDI
wordLen = -1;
#endif
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
--expectedResults;
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(NS_ConvertUCS2toUTF8(tmp).get(), stdout);
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
#ifdef IBMBIDI
wordLen = -1;
#endif
}
if (expectedResults != st->modes[preMode].data) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
if (error) {
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
}
else if (gNoisy) {
fprintf(stdout, "nsTextTransformer: self test %d succeeded\n", testNum);
}
testNum++;
}
}
if (error) {
NS_ABORT();
1999-08-19 10:06:57 +04:00
}
}
nsresult
nsTextTransformer::Init2(const nsTextFragment* aFrag,
PRInt32 aStartingOffset,
PRUint8 aWhiteSpace,
PRUint8 aTextTransform)
{
mFrag = aFrag;
// Sanitize aStartingOffset
if (aStartingOffset < 0) {
NS_WARNING("bad starting offset");
aStartingOffset = 0;
}
else if (aStartingOffset > mFrag->GetLength()) {
NS_WARNING("bad starting offset");
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = aTextTransform;
return NS_OK;
}
#endif /* DEBUG */