pjs/layout/generic/nsTextTransformer.cpp

1521 строка
44 KiB
C++
Исходник Обычный вид История

1998-10-20 04:17:17 +04:00
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
1998-10-20 04:17:17 +04:00
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
1998-10-20 04:17:17 +04:00
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
1998-10-20 04:17:17 +04:00
*/
2000-04-12 19:52:50 +04:00
#include <ctype.h>
#include "nsCOMPtr.h"
1998-10-20 04:17:17 +04:00
#include "nsTextTransformer.h"
#include "nsIContent.h"
#include "nsIFrame.h"
#include "nsIStyleContext.h"
#include "nsITextContent.h"
#include "nsStyleConsts.h"
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
1999-04-28 02:14:36 +04:00
#include "nsHTMLIIDs.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsICaseConversion.h"
#include "prenv.h"
nsAutoTextBuffer::nsAutoTextBuffer()
: mBuffer(mAutoBuffer),
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
{
}
nsAutoTextBuffer::~nsAutoTextBuffer()
{
if (mBuffer && (mBuffer != mAutoBuffer)) {
delete [] mBuffer;
}
}
nsresult
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
{
PRInt32 newSize = mBufferLen * 2;
if (newSize < mBufferLen + aAtLeast) {
newSize = mBufferLen + aAtLeast + 100;
}
return GrowTo(newSize, aCopyToHead);
}
nsresult
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
{
if (aNewSize > mBufferLen) {
PRUnichar* newBuffer = new PRUnichar[aNewSize];
if (!newBuffer) {
return NS_ERROR_OUT_OF_MEMORY;
}
nsCRT::memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
mBuffer, sizeof(PRUnichar) * mBufferLen);
if (mBuffer != mAutoBuffer) {
delete [] mBuffer;
}
mBuffer = newBuffer;
mBufferLen = aNewSize;
}
return NS_OK;
}
//----------------------------------------------------------------------
static NS_DEFINE_IID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
static nsICaseConversion* gCaseConv = nsnull;
nsresult
nsTextTransformer::Initialize()
{
nsresult res = NS_OK;
if (!gCaseConv) {
res = nsServiceManager::GetService(kUnicharUtilCID, NS_GET_IID(nsICaseConversion),
(nsISupports**)&gCaseConv);
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
}
return res;
}
void
nsTextTransformer::Shutdown()
{
if (gCaseConv) {
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
gCaseConv = nsnull;
}
}
// For now, we have only a couple of characters to strip out. If we get
// any more, change this to use a bitset to lookup into.
// CH_SHY - soft hyphen (discretionary hyphen)
#define IS_DISCARDED(_ch) \
(((_ch) == CH_SHY) || ((_ch) == '\r'))
1998-10-20 04:17:17 +04:00
#define MAX_UNIBYTE 127
MOZ_DECL_CTOR_COUNTER(nsTextTransformer)
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker,
nsIPresContext* aPresContext)
: mFrag(nsnull),
mOffset(0),
mMode(eNormal),
mLineBreaker(aLineBreaker),
mWordBreaker(aWordBreaker),
mBufferPos(0),
mFlags(0),
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE)
1998-10-20 04:17:17 +04:00
{
MOZ_COUNT_CTOR(nsTextTransformer);
aPresContext->
GetLanguageSpecificTransformType(&mLanguageSpecificTransformType);
if (aLineBreaker == nsnull && aWordBreaker == nsnull )
NS_ASSERTION(0, "invalid creation of nsTextTransformer");
#ifdef DEBUG
static PRBool firstTime = PR_TRUE;
if (firstTime) {
firstTime = PR_FALSE;
SelfTest(aLineBreaker, aWordBreaker, aPresContext);
}
#endif
1998-10-20 04:17:17 +04:00
}
nsTextTransformer::~nsTextTransformer()
{
MOZ_COUNT_DTOR(nsTextTransformer);
1998-10-20 04:17:17 +04:00
}
nsresult
nsTextTransformer::Init(nsIFrame* aFrame,
nsIContent* aContent,
PRInt32 aStartingOffset,
PRBool aLeaveAsAscii)
1998-10-20 04:17:17 +04:00
{
// Get the contents text content
nsresult rv;
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
if (tc.get()) {
tc->GetText(&mFrag);
// Sanitize aStartingOffset
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
aStartingOffset = 0;
}
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
"bad starting offset")) {
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
const nsStyleText* styleText;
aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText);
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = styleText->mTextTransform;
if (aLeaveAsAscii) { // See if the text fragment is 1-byte text
SetLeaveAsAscii(PR_TRUE);
// XXX Currently we only leave it as ascii for normal text and not for preformatted
// or preformatted wrapped text or language specific transforms
if (mFrag->Is2b() || (eNormal != mMode) ||
(mLanguageSpecificTransformType !=
eLanguageSpecificTransformType_None))
// We don't step down from Unicode to ascii
SetLeaveAsAscii(PR_FALSE);
}
else
SetLeaveAsAscii(PR_FALSE);
1998-10-20 04:17:17 +04:00
}
return rv;
1998-10-20 04:17:17 +04:00
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_F()
1998-10-20 04:17:17 +04:00
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
for (; offset < fragLen; offset++) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
1998-10-20 04:17:17 +04:00
}
}
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
if (TransformedTextIsAscii()) {
unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer;
bp[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
return offset;
1998-10-20 04:17:17 +04:00
}
void
nsTextTransformer::ConvertTransformedTextToUnicode()
{
// Go backwards over the characters and convert them.
PRInt32 lastChar = mBufferPos - 1;
unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar;
PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar;
NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos,
"transform buffer is too small");
for (PRInt32 count = mBufferPos; count > 0; count--) {
*cp2-- = PRUnichar(*cp1--);
}
}
1998-10-20 04:17:17 +04:00
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
1998-10-20 04:17:17 +04:00
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
for (; offset < fragLen; offset++) {
unsigned char ch = *cp++;
if (XP_IS_SPACE(ch)) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
1998-10-20 04:17:17 +04:00
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
PRInt32
nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 prevBufferPos = mBufferPos;
PRBool breakAfterThis = PR_FALSE;
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
union {
unsigned char* bp1;
PRUnichar* bp2;
};
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
for (; offset < fragLen && !breakAfterThis; offset++) {
unsigned char ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
if (offset == mOffset)
breakAfterThis = PR_TRUE;
else
break;
}
else if (XP_IS_SPACE(ch)) {
break;
}
else if (IS_DISCARDED(ch)) {
// Strip discarded characters from the transformed output
continue;
}
if (ch > MAX_UNIBYTE) {
// The text has a multibyte character so we can no longer leave the
// text as ascii text
SetHasMultibyte(PR_TRUE);
if (TransformedTextIsAscii()) {
SetTransformedTextIsAscii(PR_FALSE);
*aWasTransformed = PR_TRUE;
// Transform any existing ascii text to Unicode
if (mBufferPos > 0) {
ConvertTransformedTextToUnicode();
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
}
}
}
if (mBufferPos >= mTransformBuf.mBufferLen) {
nsresult rv = mTransformBuf.GrowBy(128);
if (NS_FAILED(rv)) {
// If we run out of space then just truncate the text
break;
}
bp2 = mTransformBuf.GetBuffer();
if (TransformedTextIsAscii()) {
bp1 += mBufferPos;
} else {
bp2 += mBufferPos;
}
}
if (TransformedTextIsAscii()) {
*bp1++ = ch;
} else {
*bp2++ = PRUnichar(ch);
}
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar firstChar = frag->CharAt(offset++);
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
// Only evaluate complex breaking logic if there are more characters
// beyond the first to look at.
PRInt32 numChars = 1;
if (offset < fragLen) {
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
else {
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
// don't transform the first character until after BreakInBetween is called
// Kipp originally did this at the top of the function, which was too early.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
if (NS_FAILED(rv)) {
*aWordLen = 0;
return offset - 1;
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
if (!breakBetween) {
// Find next position
PRBool tryNextFrag;
PRUint32 next;
if (aForLineBreak) {
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
1998-10-22 00:05:31 +04:00
}
else {
mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
}
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
// Since we know the number of characters we're adding grow the buffer
// now before we start copying
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
}
offset += numChars - 1;
// 1. convert nbsp into space
// 2. check for discarded characters
// 3. check mHasMultibyte flag
// 4. copy buffer
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
const PRUnichar* end = cp + numChars - 1;
while (cp < end) {
PRUnichar ch = *cp++;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
// Strip discarded characters from the transformed output
numChars--;
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*bp++ = ch;
mBufferPos++;
1998-10-22 00:05:31 +04:00
}
}
1998-10-22 00:05:31 +04:00
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
else
{ // transform the first character
// we do this here, rather than at the top of the function (like Kipp originally had it)
// because if we must call BreakInBetween, then we must do so before the transformation
// this is the case where BreakInBetween does not need to be called at all.
// see bug 14280
if (CH_NBSP == firstChar) {
firstChar = ' ';
*aWasTransformed = PR_TRUE;
}
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
if (NS_FAILED(rv)) {
*aWordLen = 0;
return offset - 1;
}
bug 14280 nsTextTransformer.cpp. I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account for the space as non-breaking and therefore part of the first word in the block. bug 39901 and 38396 nsHTMLImageLoader.*, nsImageFrame.cpp I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate bug that I'll file shortly. But this fixes the vast majority of real cases out there. bug 18754 nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach. bug 50257 nsLineLayout.cpp Did a couple of things in here: * The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line. A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a line. This is a wimpier, safer solution. * I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that. * I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly created object is a waste. nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
}
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
if (IS_DISCARDED(ch)) {
// Keep looping if this is a discarded character
continue;
}
break;
}
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ' ';
mBufferPos++;
1998-10-20 04:17:17 +04:00
}
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
PRInt32 prevBufferPos = mBufferPos;
for (; offset < fragLen; offset++) {
// This function is used for both Unicode and ascii strings so don't
// make any assumptions about what kind of data it is
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
1998-10-20 04:17:17 +04:00
*aWordLen = mBufferPos - prevBufferPos;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen,
PRBool* aWasTransformed)
{
const nsTextFragment* frag = mFrag;
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
const unsigned char* cp = (const unsigned char*) frag->Get1b();
const unsigned char* end = cp + frag->GetLength();
PRInt32 prevBufferPos = mBufferPos;
cp += mOffset;
while (cp < end) {
PRUnichar ch = (PRUnichar) *cp++;
if ((ch == '\t') || (ch == '\n')) {
cp--;
break;
}
if (CH_NBSP == ch) {
ch = ' ';
*aWasTransformed = PR_TRUE;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == endbp) {
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBuffer() + oldLength;
endbp = mTransformBuf.GetBufferEnd();
}
*bp++ = ch;
mBufferPos++;
}
*aWordLen = mBufferPos - prevBufferPos;
return cp - ((const unsigned char*)frag->Get1b());
}
//----------------------------------------
static void
AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = tolower(*aText);
aText++;
}
}
static void
AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen)
{
while (aWordLen-- > 0) {
*aText = toupper(*aText);
aText++;
}
}
#define kSzlig 0x00DF
static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len)
{
PRInt32 i,cnt;
for(i=0,cnt=0; i<len; i++, aText++)
{
if(kSzlig == *aText)
cnt++;
}
return cnt;
}
static void ReplaceGermanSzligToSS(PRUnichar* aText, PRInt32 len, PRInt32 szCnt)
{
PRUnichar *src, *dest;
src = aText + len - 1;
dest = src + szCnt;
while( (src!=dest) && (src >= aText) )
{
if(kSzlig == *src )
{
*dest-- = PRUnichar('S');
*dest-- = PRUnichar('S');
src--;
} else {
*dest-- = *src--;
}
}
}
void
nsTextTransformer::LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen,
PRBool* aWasTransformed)
{
if (mLanguageSpecificTransformType ==
eLanguageSpecificTransformType_Japanese) {
for (PRInt32 i = 0; i < aLen; i++) {
if (aText[i] == 0x5C) { // BACKSLASH
aText[i] = 0xA5; // YEN SIGN
SetHasMultibyte(PR_TRUE);
*aWasTransformed = PR_TRUE;
}
#if 0
/*
* We considered doing this, but since some systems may not have fonts
* with this OVERLINE glyph, we decided not to do this.
*/
else if (aText[i] == 0x7E) { // TILDE
aText[i] = 0x203E; // OVERLINE
SetHasMultibyte(PR_TRUE);
*aWasTransformed = PR_TRUE;
}
#endif
}
}
else if (mLanguageSpecificTransformType ==
eLanguageSpecificTransformType_Korean) {
for (PRInt32 i = 0; i < aLen; i++) {
if (aText[i] == 0x5C) { // BACKSLASH
aText[i] = 0x20A9; // WON SIGN
SetHasMultibyte(PR_TRUE);
*aWasTransformed = PR_TRUE;
}
}
}
}
PRUnichar*
nsTextTransformer::GetNextWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool* aWasTransformed,
PRBool aResetTransformBuf,
PRBool aForLineBreak)
{
const nsTextFragment* frag = mFrag;
PRInt32 fragLen = frag->GetLength();
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
PRBool prevBufferPos;
// Initialize OUT parameter
*aWasTransformed = PR_FALSE;
// See if we should reset the current buffer position back to the
// beginning of the buffer
if (aResetTransformBuf) {
mBufferPos = 0;
SetTransformedTextIsAscii(LeaveAsAscii());
}
prevBufferPos = mBufferPos;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
while (offset < fragLen) {
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
offset++;
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_F();
if (firstChar != ' ') {
*aWasTransformed = PR_TRUE;
}
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
*aWasTransformed = PR_TRUE;
// Make sure we have enough room in the transform buffer
if (mBufferPos >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
}
offset++;
if (TransformedTextIsAscii()) {
((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' ';
} else {
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
}
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanPreData_F(&wordLen, aWasTransformed);
}
else {
offset = ScanPreAsciiData_F(&wordLen, aWasTransformed);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
offset++;
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_F(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
}
else {
if (!aForLineBreak)
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
else
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
}
break;
}
if (TransformedTextIsAscii()) {
unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
*wordPtr = toupper(*wordPtr);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
AsciiToLowerCase(wordPtr, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
AsciiToUpperCase(wordPtr, wordLen);
break;
}
NS_ASSERTION(mLanguageSpecificTransformType ==
eLanguageSpecificTransformType_None,
"should not be ASCII for language specific transforms");
}
result = (PRUnichar*)wordPtr;
} else {
result = &mTransformBuf.mBuffer[prevBufferPos];
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
// if the first character is szlig
if(kSzlig == *result)
{
if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) {
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
PRUnichar* src = result + wordLen;
while(src>result)
{
*(src+1) = *src;
src--;
}
result[0] = PRUnichar('S');
result[1] = PRUnichar('S');
wordLen++;
}
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
{
gCaseConv->ToUpper(result, result, wordLen);
// first we search for German Szlig
PRInt32 szligCnt = CountGermanSzlig(result, wordLen);
if(szligCnt > 0) {
// Make sure we have enough room in the transform buffer
if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen)
{
mTransformBuf.GrowBy(128);
result = &mTransformBuf.mBuffer[prevBufferPos];
}
ReplaceGermanSzligToSS(result, wordLen, szligCnt);
wordLen += szligCnt;
}
}
break;
}
if (mLanguageSpecificTransformType !=
eLanguageSpecificTransformType_None) {
LanguageSpecificTransform(result, wordLen, aWasTransformed);
}
}
}
break;
}
*aWordLenResult = wordLen;
*aContentLenResult = offset - mOffset;
*aIsWhiteSpaceResult = isWhitespace;
1998-10-20 04:17:17 +04:00
// If the word length doesn't match the content length then we transformed
// the text
if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) ||
(*aWordLenResult != *aContentLenResult)) {
*aWasTransformed = PR_TRUE;
}
1998-10-22 00:05:31 +04:00
mOffset = offset;
return result;
1998-10-20 04:17:17 +04:00
}
//----------------------------------------------------------------------
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanNormalWhiteSpace_B()
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch)) {
// If character is not discardable then stop looping, otherwise
// let the discarded character collapse with the other spaces.
if (!IS_DISCARDED(ch)) {
break;
}
}
}
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (CH_NBSP == ch) {
ch = ' ';
}
if (XP_IS_SPACE(ch)) {
break;
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
const PRUnichar* cp0 = frag->Get2b();
PRInt32 offset = mOffset - 1;
PRUnichar firstChar = frag->CharAt(offset);
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
PRInt32 numChars = 1;
if (offset > 0) {
const PRUnichar* cp = cp0 + offset;
PRBool breakBetween = PR_FALSE;
if (aForLineBreak) {
mLineBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
else {
mWordBreaker->BreakInBetween(cp0, offset + 1,
mTransformBuf.GetBufferEnd()-1, 1,
&breakBetween);
}
if (!breakBetween) {
// Find next position
PRBool tryPrevFrag;
PRUint32 prev;
if (aForLineBreak) {
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
}
else {
mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
}
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
// Grow buffer before copying
nsresult rv = mTransformBuf.GrowTo(numChars);
if (NS_FAILED(rv)) {
numChars = mTransformBuf.GetBufferLength();
}
// 1. convert nbsp into space
// 2. check mHasMultibyte flag
// 3. copy buffer
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
const PRUnichar* end = cp - numChars + 1;
while (cp > end) {
PRUnichar ch = *--cp;
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
*--bp = ch;
}
// Recompute offset and numChars in case we stripped something
offset = offset - numChars;
numChars = mTransformBuf.GetBufferEnd() - bp;
}
}
else
offset--;
*aWordLen = numChars;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
PRInt32
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
// Keep looping if this is a discarded character
if (IS_DISCARDED(ch)) {
continue;
}
break;
}
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ' ';
}
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
PRInt32
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRUnichar* bp = mTransformBuf.GetBufferEnd();
PRUnichar* startbp = mTransformBuf.GetBuffer();
while (--offset >= 0) {
PRUnichar ch = frag->CharAt(offset);
if ((ch == '\t') || (ch == '\n')) {
break;
}
if (CH_NBSP == ch) {
ch = ' ';
}
else if (IS_DISCARDED(ch)) {
continue;
}
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
if (bp == startbp) {
PRInt32 oldLength = mTransformBuf.mBufferLen;
nsresult rv = mTransformBuf.GrowBy(1000);
if (NS_FAILED(rv)) {
// If we run out of space (unlikely) then just chop the input
offset++;
break;
}
bp = mTransformBuf.GetBufferEnd() - oldLength;
startbp = mTransformBuf.GetBuffer();
}
*--bp = ch;
}
1999-08-19 10:06:57 +04:00
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
return offset;
}
1999-08-19 10:06:57 +04:00
//----------------------------------------
1999-08-19 10:06:57 +04:00
PRUnichar*
nsTextTransformer::GetPrevWord(PRBool aInWord,
PRInt32* aWordLenResult,
PRInt32* aContentLenResult,
PRBool* aIsWhiteSpaceResult,
PRBool aForLineBreak)
{
const nsTextFragment* frag = mFrag;
PRInt32 offset = mOffset;
PRInt32 wordLen = 0;
PRBool isWhitespace = PR_FALSE;
PRUnichar* result = nsnull;
// Fix word breaking problem w/ PREFORMAT and PREWRAP
// for word breaking, we should really go to the normal code
if((! aForLineBreak) && (eNormal != mMode))
mMode = eNormal;
while (--offset >= 0) {
PRUnichar firstChar = frag->CharAt(offset);
// Eat up any discarded characters before dispatching
if (IS_DISCARDED(firstChar)) {
continue;
}
switch (mMode) {
default:
case eNormal:
if (XP_IS_SPACE(firstChar)) {
offset = ScanNormalWhiteSpace_B();
wordLen = 1;
isWhitespace = PR_TRUE;
}
else if (CH_NBSP == firstChar && !aForLineBreak) {
wordLen = 1;
isWhitespace = PR_TRUE;
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
offset--;
} else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen);
}
break;
case ePreformatted:
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
isWhitespace = PR_TRUE;
}
else {
offset = ScanPreData_B(&wordLen);
}
break;
case ePreWrap:
if (XP_IS_SPACE(firstChar)) {
if (('\n' == firstChar) || ('\t' == firstChar)) {
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
offset--; // make sure we overshoot
wordLen = 1;
}
else {
offset = ScanPreWrapWhiteSpace_B(&wordLen);
}
isWhitespace = PR_TRUE;
}
else if (frag->Is2b()) {
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
}
else {
offset = ScanNormalAsciiText_B(&wordLen);
}
break;
}
// Backwards scanning routines *always* overshoot by one for the
// returned offset value.
offset = offset + 1;
result = mTransformBuf.GetBufferEnd() - wordLen;
if (!isWhitespace) {
switch (mTextTransform) {
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(result, result, wordLen);
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
gCaseConv->ToUpper(result, result, wordLen);
break;
}
}
break;
}
*aWordLenResult = wordLen;
*aContentLenResult = mOffset - offset;
*aIsWhiteSpaceResult = isWhitespace;
mOffset = offset;
return result;
}
//----------------------------------------------------------------------
// Self test logic for this class. This will (hopefully) make sure
// that the forward and backward word iterator methods continue to
// function as people change things...
#ifdef DEBUG
struct SelfTestSection {
int length;
int* data;
};
#define NUM_MODES 3
struct SelfTestData {
const PRUnichar* text;
SelfTestSection modes[NUM_MODES];
};
static PRUint8 preModeValue[NUM_MODES] = {
NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_WHITESPACE_PRE,
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
};
static PRUnichar test1text[] = {
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test1PreResults[] = { 9, 1, 12 };
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test2text[] = {
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
};
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static int test2PreResults[] = { 9, 1, 13 };
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
static PRUnichar test3text[] = {
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
};
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
static int test3PreResults[] = { 7, 1, 3, };
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
static PRUnichar test4text[] = {
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
};
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
static int test4PreResults[] = { 10, 1, 12 };
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
static PRUnichar test5text[] = {
CH_SHY, 0
};
static int test5Results[] = { 0 };
static int test5PreResults[] = { 0 };
static int test5PreWrapResults[] = { 0 };
#if 0
static PRUnichar test6text[] = {
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
0x30da, 0x30fc, 0x30b8, 0x3002, 0
};
static int test6Results[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
static int test6PreResults[] = { 20, 1, 13 };
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
5, 1, 4, 1, 5,
1, 2, 1, 2, 2 };
#endif
static SelfTestData tests[] = {
{ test1text,
{ { sizeof(test1Results)/sizeof(int), test1Results, },
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
},
{ test2text,
{ { sizeof(test2Results)/sizeof(int), test2Results, },
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
},
{ test3text,
{ { sizeof(test3Results)/sizeof(int), test3Results, },
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
},
{ test4text,
{ { sizeof(test4Results)/sizeof(int), test4Results, },
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
},
{ test5text,
{ { sizeof(test5Results)/sizeof(int), test5Results, },
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
},
#if 0
{ test6text,
{ { sizeof(test6Results)/sizeof(int), test6Results, },
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
},
#endif
};
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
1999-08-19 10:06:57 +04:00
void
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker,
nsIPresContext* aPresContext)
1999-08-19 10:06:57 +04:00
{
PRBool gNoisy = PR_FALSE;
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
gNoisy = PR_TRUE;
}
PRBool error = PR_FALSE;
PRInt32 testNum = 0;
SelfTestData* st = tests;
SelfTestData* last = st + NUM_TESTS;
for (; st < last; st++) {
PRUnichar* bp;
PRInt32 wordLen, contentLen;
PRBool ws, transformed;
PRBool isAsciiTest = PR_TRUE;
const PRUnichar* cp = st->text;
while (*cp) {
if (*cp > 255) {
isAsciiTest = PR_FALSE;
break;
}
cp++;
}
nsTextFragment frag(st->text);
nsTextTransformer tx(aLineBreaker, aWordBreaker, aPresContext);
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
// Do forwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(uc2, stdout);
printf("'\n");
}
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
int* expectedResults = st->modes[preMode].data;
int resultsLen = st->modes[preMode].length;
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) {
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(tmp, stdout);
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, *expectedResults, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
expectedResults++;
}
if (expectedResults != st->modes[preMode].data + resultsLen) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
// Do backwards test
if (gNoisy) {
nsAutoString uc2(st->text);
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
fputs(uc2, stdout);
printf("'\n");
}
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
NS_STYLE_TEXT_TRANSFORM_NONE);
expectedResults = st->modes[preMode].data + resultsLen;
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
--expectedResults;
if (gNoisy) {
nsAutoString tmp(bp, wordLen);
printf(" '");
fputs(tmp, stdout);
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
ws ? "yes" : "no",
wordLen, contentLen, tx.mOffset);
}
if (*expectedResults != wordLen) {
error = PR_TRUE;
break;
}
}
if (expectedResults != st->modes[preMode].data) {
if (st->modes[preMode].data[0] != 0) {
error = PR_TRUE;
}
}
if (error) {
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
}
testNum++;
}
}
if (error) {
NS_ABORT();
1999-08-19 10:06:57 +04:00
}
}
nsresult
nsTextTransformer::Init2(const nsTextFragment* aFrag,
PRInt32 aStartingOffset,
PRUint8 aWhiteSpace,
PRUint8 aTextTransform)
{
mFrag = aFrag;
// Sanitize aStartingOffset
if (NS_WARN_IF_FALSE(aStartingOffset >= 0, "bad starting offset")) {
aStartingOffset = 0;
}
else if (NS_WARN_IF_FALSE(aStartingOffset <= mFrag->GetLength(),
"bad starting offset")) {
aStartingOffset = mFrag->GetLength();
}
mOffset = aStartingOffset;
// Get the frames text style information
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
mMode = ePreformatted;
}
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
mMode = ePreWrap;
}
mTextTransform = aTextTransform;
return NS_OK;
}
#endif /* DEBUG */