pjs/layout/generic/nsTextTransformer.cpp

673 строки
19 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
*/
#include "nsTextTransformer.h"
#include "nsIContent.h"
#include "nsIFrame.h"
#include "nsIStyleContext.h"
#include "nsITextContent.h"
#include "nsStyleConsts.h"
#include "nsILineBreaker.h"
#include "nsIWordBreaker.h"
#include "nsHTMLIIDs.h"
#include "nsIServiceManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsICaseConversion.h"
static NS_DEFINE_IID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
static NS_DEFINE_IID(kICaseConversionIID, NS_ICASECONVERSION_IID);
static nsICaseConversion* gCaseConv = nsnull;
static nsrefcnt gCaseConvRefCnt = 0;
//#define DEBUG_GETPREVWORD
// XXX I'm sure there are other special characters
#define CH_NBSP 160
#define MAX_UNIBYTE 127
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
nsIWordBreaker* aWordBreaker)
: mBuffer(mAutoWordBuffer),
mBufferLength(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE),
mHasMultibyte(PR_FALSE),
mLineBreaker(aLineBreaker),
mWordBreaker(aWordBreaker)
{
if (gCaseConvRefCnt++ == 0) {
nsresult res;
res = nsServiceManager::GetService(kUnicharUtilCID, kICaseConversionIID,
(nsISupports**)&gCaseConv);
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
}
}
nsTextTransformer::~nsTextTransformer()
{
if (mBuffer != mAutoWordBuffer) {
delete [] mBuffer;
}
if (--gCaseConvRefCnt == 0) {
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
gCaseConv = nsnull;
}
}
nsresult
nsTextTransformer::Init(nsIFrame* aFrame,
nsIContent* aContent,
PRInt32 aStartingOffset)
{
// Get the frames text content
nsITextContent* tc;
if (NS_OK != aContent->QueryInterface(kITextContentIID, (void**) &tc)) {
return NS_OK;
}
tc->GetText(mFrags, mNumFrags);
NS_RELEASE(tc);
mStartingOffset = aStartingOffset;
mOffset = mStartingOffset;
// Compute the total length of the text content.
PRInt32 sum = 0;
PRInt32 n = mNumFrags;
const nsTextFragment* frag = mFrags;
for (; --n >= 0; frag++) {
sum += frag->GetLength();
}
mContentLength = sum;
// Set current fragment and current fragment offset
mCurrentFrag = mFrags;
mCurrentFragOffset = 0;
PRInt32 offset = 0;
n = mNumFrags;
for (frag = mFrags; --n >= 0; frag++) {
if (aStartingOffset < offset + frag->GetLength()) {
mCurrentFrag = frag;
mCurrentFragOffset = aStartingOffset - offset;
break;
}
offset += frag->GetLength();
}
if (mNumFrags && aStartingOffset == mContentLength){
mCurrentFrag = mFrags + (mNumFrags -1);
mCurrentFragOffset = mCurrentFrag->GetLength();
}
// Get the frames style and choose a transform proc
const nsStyleText* styleText;
aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText);
mPreformatted = (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) ||
(NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace);
mTextTransform = styleText->mTextTransform;
return NS_OK;
}
PRBool
nsTextTransformer::GrowBuffer(PRBool aForNextWord)
{
PRInt32 newLen = mBufferLength * 2;
if (newLen <= 100) {
newLen = 100;
}
PRUnichar* newBuffer = new PRUnichar[newLen];
if (nsnull == newBuffer) {
return PR_FALSE;
}
if (0 != mBufferLength) {
if(aForNextWord)
nsCRT::memcpy(newBuffer, mBuffer, sizeof(PRUnichar) * mBufferLength);
else
nsCRT::memcpy(&newBuffer[mBufferLength], mBuffer,
sizeof(PRUnichar) * mBufferLength);
if (mBuffer != mAutoWordBuffer) {
delete [] mBuffer;
}
}
mBuffer = newBuffer;
mBufferLength = newLen;
return PR_TRUE;
}
PRUnichar*
nsTextTransformer::GetNextWord(PRBool aInWord,
PRInt32& aWordLenResult,
PRInt32& aContentLenResult,
PRBool& aIsWhitespaceResult,
PRBool aForLineBreak)
{
NS_PRECONDITION(mOffset <= mContentLength, "bad offset");
NS_PRECONDITION(((nsnull != mLineBreaker)||(!aForLineBreak)), "null in line breaker");
NS_PRECONDITION(((nsnull != mWordBreaker)||( aForLineBreak)), "null in word breaker");
// See if the content has been exhausted
if (mOffset == mContentLength) {
aWordLenResult = 0;
aContentLenResult = 0;
return nsnull;
}
PRUnichar* bp = mBuffer;
PRUnichar* bufEnd = mBuffer + mBufferLength;
const nsTextFragment* frag = mCurrentFrag;
const nsTextFragment* lastFrag = mFrags + mNumFrags;
PRInt32 wordLen = 1;
PRInt32 contentLen = 1;
// Set the isWhitespace flag by examining the next character in the
// text fragment.
PRInt32 offset = mCurrentFragOffset;
PRUnichar firstChar;
if (frag->Is2b()) {
const PRUnichar* up = frag->Get2b();
firstChar = up[offset];
}
else {
const unsigned char* cp = (const unsigned char*) frag->Get1b();
firstChar = PRUnichar(cp[offset]);
}
PRBool isWhitespace = XP_IS_SPACE(firstChar);
offset++;
if (isWhitespace) {
if (mPreformatted) {
if ('\t' == firstChar) {
// Leave tab alone so that caller can expand it
}
else if ('\n' == firstChar) {
// Advance content past newline but do not allow newline to
// remain in the word.
wordLen--;
}
else {
firstChar = ' ';
}
}
else {
firstChar = ' ';
}
}
else if (CH_NBSP == firstChar) {
firstChar = ' ';
}
if (firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
*bp++ = firstChar;
if (offset == frag->GetLength()) {
mCurrentFrag = ++frag;
offset = 0;
}
mCurrentFragOffset = offset;
if (isWhitespace && mPreformatted) {
goto really_done;
}
PRInt32 numChars;
while (frag < lastFrag) {
PRInt32 fragLen = frag->GetLength();
// Scan characters in this fragment that are the same kind as the
// isWhitespace flag indicates.
if (frag->Is2b()) {
const PRUnichar* cp0 = frag->Get2b();
const PRUnichar* end = cp0 + fragLen;
const PRUnichar* cp = cp0 + offset;
if (isWhitespace) {
while (cp < end) {
PRUnichar ch = *cp;
if (XP_IS_SPACE(ch)) {
cp++;
continue;
}
numChars = (cp - offset) - cp0;
contentLen += numChars;
mCurrentFragOffset += numChars;
goto done;
}
numChars = (cp - offset) - cp0;
contentLen += numChars;
}
else {
if(wordLen > 0) {
nsresult res = NS_OK;
PRBool breakBetween = PR_FALSE;
if(aForLineBreak)
res = mLineBreaker->BreakInBetween(mBuffer, wordLen,
cp, (fragLen-offset), &breakBetween);
else
res = mWordBreaker->BreakInBetween(mBuffer, wordLen,
cp, (fragLen-offset), &breakBetween);
if ( breakBetween )
goto done;
PRBool tryNextFrag = PR_FALSE;
PRUint32 next;
// Find next position
if(aForLineBreak)
res = mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
else
res = mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
numChars = (next - offset);
// check buffer size before copy
while((bp + numChars ) > bufEnd) {
PRInt32 delta = bp - mBuffer;
if(!GrowBuffer()) {
goto done;
}
bp = mBuffer + delta;
bufEnd = mBuffer + mBufferLength;
}
wordLen += numChars;
mCurrentFragOffset += numChars;
contentLen += numChars;
end = cp + numChars;
// 1. convert nbsp into space
// 2. check mHasMultibyte flag
// 3. copy buffer
while(cp < end) {
PRUnichar ch = *cp++;
if (CH_NBSP == ch) ch = ' ';
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
*bp++ = ch;
}
if(! tryNextFrag) {
// can decide break position inside this TextFrag
goto done;
}
}
}
}
else {
const unsigned char* cp0 = (const unsigned char*) frag->Get1b();
const unsigned char* end = cp0 + fragLen;
const unsigned char* cp = cp0 + offset;
if (isWhitespace) {
while (cp < end) {
PRUnichar ch = PRUnichar(*cp);
if (XP_IS_SPACE(ch)) {
cp++;
continue;
}
numChars = (cp - offset) - cp0;
contentLen += numChars;
mCurrentFragOffset += numChars;
goto done;
}
numChars = (cp - offset) - cp0;
contentLen += numChars;
}
else {
while (cp < end) {
PRUnichar ch = PRUnichar(*cp);
if (!XP_IS_SPACE(ch)) {
if (CH_NBSP == ch) ch = ' ';
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
cp++;
// Store character in buffer; grow buffer if we have to
NS_ASSERTION(bp < bufEnd, "whoops");
*bp++ = ch;
if (bp == bufEnd) {
PRInt32 delta = bp - mBuffer;
if (!GrowBuffer()) {
goto done;
}
bp = mBuffer + delta;
bufEnd = mBuffer + mBufferLength;
}
continue;
}
numChars = (cp - offset) - cp0;
wordLen += numChars;
contentLen += numChars;
mCurrentFragOffset += numChars;
goto done;
}
numChars = (cp - offset) - cp0;
wordLen += numChars;
contentLen += numChars;
}
}
// Advance to next text fragment
frag++;
mCurrentFrag = frag;
mCurrentFragOffset = 0;
offset = 0;
}
done:;
if (!isWhitespace)
{
switch(mTextTransform)
{
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(mBuffer, mBuffer, wordLen, !aInWord);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(mBuffer, mBuffer, wordLen );
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
gCaseConv->ToUpper(mBuffer, mBuffer, wordLen );
break;
default:
break;
}
}
really_done:;
mOffset += contentLen;
NS_ASSERTION(mOffset <= mContentLength, "whoops");
aWordLenResult = wordLen;
aContentLenResult = contentLen;
aIsWhitespaceResult = isWhitespace;
return mBuffer;
}
PRUnichar*
nsTextTransformer::GetPrevWord(PRBool aInWord,
PRInt32& aWordLenResult,
PRInt32& aContentLenResult,
PRBool& aIsWhitespaceResult,
PRBool aForLineBreak)
{
NS_PRECONDITION(mOffset <= mContentLength, "bad offset");
NS_PRECONDITION(((nsnull != mLineBreaker)||(!aForLineBreak)), "null in line breaker");
NS_PRECONDITION(((nsnull != mWordBreaker)||( aForLineBreak)), "null in word breaker");
// See if the content has been exhausted
if (mOffset == 0) {
aWordLenResult = 0;
aContentLenResult = 0;
return nsnull;
}
PRUnichar* bp = mBuffer+mBufferLength-1;
PRUnichar* bufEnd = mBuffer ;
const nsTextFragment* frag = mCurrentFrag;
const nsTextFragment* lastFrag = mFrags;//1st is the last
PRInt32 wordLen = 1;
PRInt32 contentLen = 1;
// Set the isWhitespace flag by examining the next character in the
// text fragment.
PRInt32 offset = mCurrentFragOffset-1;
PRUnichar firstChar;
if (frag->Is2b()) {
const PRUnichar* up = frag->Get2b();
if (offset > 0)
firstChar = up[offset];
else
firstChar = up[0];
}
else {
const unsigned char* cp = (const unsigned char*) frag->Get1b();
if (offset > 0)
firstChar = PRUnichar(cp[offset]);
else
firstChar = PRUnichar(cp[0]);
}
PRBool isWhitespace = XP_IS_SPACE(firstChar);
offset--;
if (isWhitespace) {
if (mPreformatted) {
if ('\t' == firstChar) {
// Leave tab alone so that caller can expand it
}
else if ('\n' == firstChar) {
// Advance content past newline but do not allow newline to
// remain in the word.
wordLen--;
}
else {
firstChar = ' ';
}
}
else {
firstChar = ' ';
}
}
else if (CH_NBSP == firstChar) {
firstChar = ' ';
}
if(firstChar > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
*bp-- = firstChar;
mCurrentFragOffset = offset +1;
if (offset < 0) {
if (mCurrentFrag == mFrags){
goto really_done;
}
mCurrentFrag = --frag;
offset = mCurrentFrag->GetLength()-1;
}
if (isWhitespace && mPreformatted) {
goto really_done;
}
PRInt32 numChars;
do {
// Scan characters in this fragment that are the same kind as the
// isWhitespace flag indicates.
if (frag->Is2b()) {
const PRUnichar* cp0 = frag->Get2b();
const PRUnichar* end = cp0;
const PRUnichar* cp = cp0 + offset;
if (isWhitespace) {
while (cp > end) {
PRUnichar ch = *cp;
if (XP_IS_SPACE(ch)) {
cp--;
continue;
}
numChars = (cp0 + offset) - cp;
contentLen += numChars;
mCurrentFragOffset -= numChars;
goto done;
}
numChars = (cp0 + offset) - cp;
contentLen += numChars;
}
else {
if(wordLen > 0) {
nsresult res = NS_OK;
PRBool breakBetween = PR_FALSE;
if(aForLineBreak)
res = mLineBreaker->BreakInBetween(
cp0, offset+1,
&(mBuffer[mBufferLength-wordLen]), wordLen,
&breakBetween);
else
res = mWordBreaker->BreakInBetween(
cp0, offset+1,
&(mBuffer[mBufferLength-wordLen]), wordLen,
&breakBetween);
if ( breakBetween )
goto done;
PRBool tryPrevFrag = PR_FALSE;
PRUint32 prev;
// Find prev position
if(aForLineBreak)
res = mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
else
res = mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
numChars = (offset - prev)+1;
// check buffer size before copy
while((bp - numChars ) < bufEnd) {
PRInt32 delta = (&(mBuffer[mBufferLength])) - bp -1 ;
if(!GrowBuffer()) {
goto done;
}
bp = (&(mBuffer[mBufferLength])) - delta - 1;
bufEnd = mBuffer;
}
wordLen += numChars;
mCurrentFragOffset -= numChars;
contentLen += numChars;
end = cp - numChars;
// 1. convert nbsp into space
// 2. check mHasMultibyte flag
// 3. copy buffer
while(cp > end) {
PRUnichar ch = *cp--;
if (CH_NBSP == ch) ch = ' ';
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
*bp-- = ch;
}
if(! tryPrevFrag) {
// can decide break position inside this TextFrag
goto done;
}
}
}
}
else {
const unsigned char* cp0 = (const unsigned char*) frag->Get1b();
const unsigned char* end = cp0;
const unsigned char* cp = cp0 + offset;
if (isWhitespace) {
while (cp > end) {
PRUnichar ch = PRUnichar(*cp);
if (XP_IS_SPACE(ch)) {
cp--;
continue;
}
numChars = (cp0 + offset) - cp;
contentLen += numChars;
mCurrentFragOffset -= numChars;
goto done;
}
numChars = (cp0 + offset) - cp;
contentLen += numChars;
}
else {
while (cp >= end) {
PRUnichar ch = PRUnichar(*cp);
if (!XP_IS_SPACE(ch)) {
if (CH_NBSP == ch) ch = ' ';
if (ch > MAX_UNIBYTE) mHasMultibyte = PR_TRUE;
cp--;
// Store character in buffer; grow buffer if we have to
NS_ASSERTION(bp > bufEnd, "whoops");
*bp-- = ch;
if (bp == bufEnd) {
PRInt32 delta = (&(mBuffer[mBufferLength])) - bp - 1;
if (!GrowBuffer(PR_FALSE)) {
goto done;
}
bp = (&(mBuffer[mBufferLength])) - delta - 1;
bufEnd = mBuffer;
}
continue;
}
numChars = (cp0 + offset) - cp;
wordLen += numChars;
contentLen += numChars;
mCurrentFragOffset -= numChars;
goto done;
}
numChars = (cp0 + offset) - cp;
wordLen += numChars;
contentLen += numChars;
}
}
// Advance to next text fragment
if (frag != lastFrag)
{
frag--;
mCurrentFrag = frag;
mCurrentFragOffset = mCurrentFrag->GetLength()-1;
offset = mCurrentFragOffset;
}
else
mCurrentFragOffset = 0;
}
while (frag > lastFrag);
done:;
if (!isWhitespace)
{
switch(mTextTransform)
{
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
gCaseConv->ToTitle(&(mBuffer[mBufferLength-wordLen]),
&(mBuffer[mBufferLength-wordLen]),
wordLen, !aInWord);
break;
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
gCaseConv->ToLower(&(mBuffer[mBufferLength-wordLen]),
&(mBuffer[mBufferLength-wordLen]),
wordLen );
break;
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
gCaseConv->ToUpper(&(mBuffer[mBufferLength-wordLen]),
&(mBuffer[mBufferLength-wordLen]),
wordLen );
break;
default:
break;
}
}
really_done:;
mOffset -= contentLen;
NS_ASSERTION(mOffset >= 0, "whoops");
aWordLenResult = wordLen;
aContentLenResult = contentLen;
aIsWhitespaceResult = isWhitespace;
#ifdef DEBUG_GETPREVWORD
{
printf(aIsWhitespaceResult ? "#1 WHITESPACE\n": "NOT WHITESPACE\n");
if(! aIsWhitespaceResult)
{
PRUnichar* wordBufMem = &(mBuffer[mBufferLength-wordLen]);
PRInt32 ax;
for(ax=0; ax<wordLen; ax++) {
if(wordBufMem[ax] < 0x080)
printf("%c", (char)wordBufMem[ax]);
else
printf("[U+%04X]", wordBufMem[ax]);
}
printf("(%d)\n",wordLen);
}
}
#endif
return &(mBuffer[mBufferLength-wordLen]);
}