2001-09-29 00:14:13 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
1998-10-20 04:17:17 +04:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* The contents of this file are subject to the Netscape Public License
|
|
|
|
* Version 1.1 (the "License"); you may not use this file except in
|
|
|
|
* compliance with the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/NPL/
|
1998-10-20 04:17:17 +04:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
1998-10-20 04:17:17 +04:00
|
|
|
*
|
|
|
|
* The Original Code is Mozilla Communicator client code.
|
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Netscape Communications Corporation.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
1999-11-06 06:40:37 +03:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* Contributor(s):
|
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the NPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the NPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
2000-04-12 19:52:50 +04:00
|
|
|
#include <ctype.h>
|
1999-10-20 03:01:45 +04:00
|
|
|
#include "nsCOMPtr.h"
|
1998-10-20 04:17:17 +04:00
|
|
|
#include "nsTextTransformer.h"
|
|
|
|
#include "nsIContent.h"
|
|
|
|
#include "nsIFrame.h"
|
|
|
|
#include "nsIStyleContext.h"
|
|
|
|
#include "nsITextContent.h"
|
|
|
|
#include "nsStyleConsts.h"
|
1999-02-24 21:21:23 +03:00
|
|
|
#include "nsILineBreaker.h"
|
1999-04-07 02:41:44 +04:00
|
|
|
#include "nsIWordBreaker.h"
|
1999-02-23 05:27:54 +03:00
|
|
|
#include "nsIServiceManager.h"
|
|
|
|
#include "nsUnicharUtilCIID.h"
|
|
|
|
#include "nsICaseConversion.h"
|
1999-10-20 03:01:45 +04:00
|
|
|
#include "prenv.h"
|
2001-10-19 18:10:22 +04:00
|
|
|
#include "nsIPref.h"
|
2002-06-12 01:00:20 +04:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
#include "nsLayoutAtoms.h"
|
|
|
|
#endif
|
2001-10-19 18:10:22 +04:00
|
|
|
|
|
|
|
|
|
|
|
PRPackedBool nsTextTransformer::sWordSelectPrefInited = PR_FALSE;
|
|
|
|
PRPackedBool nsTextTransformer::sWordSelectStopAtPunctuation = PR_FALSE;
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
nsAutoTextBuffer::nsAutoTextBuffer()
|
|
|
|
: mBuffer(mAutoBuffer),
|
|
|
|
mBufferLen(NS_TEXT_TRANSFORMER_AUTO_WORD_BUF_SIZE)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
nsAutoTextBuffer::~nsAutoTextBuffer()
|
|
|
|
{
|
|
|
|
if (mBuffer && (mBuffer != mAutoBuffer)) {
|
|
|
|
delete [] mBuffer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nsresult
|
|
|
|
nsAutoTextBuffer::GrowBy(PRInt32 aAtLeast, PRBool aCopyToHead)
|
|
|
|
{
|
|
|
|
PRInt32 newSize = mBufferLen * 2;
|
|
|
|
if (newSize < mBufferLen + aAtLeast) {
|
|
|
|
newSize = mBufferLen + aAtLeast + 100;
|
|
|
|
}
|
|
|
|
return GrowTo(newSize, aCopyToHead);
|
|
|
|
}
|
|
|
|
|
|
|
|
nsresult
|
|
|
|
nsAutoTextBuffer::GrowTo(PRInt32 aNewSize, PRBool aCopyToHead)
|
|
|
|
{
|
|
|
|
if (aNewSize > mBufferLen) {
|
|
|
|
PRUnichar* newBuffer = new PRUnichar[aNewSize];
|
|
|
|
if (!newBuffer) {
|
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
2002-01-12 06:18:55 +03:00
|
|
|
memcpy(&newBuffer[aCopyToHead ? 0 : mBufferLen],
|
2002-01-26 03:04:45 +03:00
|
|
|
mBuffer, sizeof(PRUnichar) * mBufferLen);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (mBuffer != mAutoBuffer) {
|
|
|
|
delete [] mBuffer;
|
|
|
|
}
|
|
|
|
mBuffer = newBuffer;
|
|
|
|
mBufferLen = aNewSize;
|
|
|
|
}
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
1999-02-23 05:27:54 +03:00
|
|
|
|
2001-10-25 05:08:40 +04:00
|
|
|
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
|
1999-10-20 03:01:45 +04:00
|
|
|
|
1999-02-23 05:27:54 +03:00
|
|
|
static nsICaseConversion* gCaseConv = nsnull;
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
nsresult
|
|
|
|
nsTextTransformer::Initialize()
|
2001-10-16 16:38:19 +04:00
|
|
|
{
|
|
|
|
nsresult res = NS_OK;
|
2001-10-19 18:10:22 +04:00
|
|
|
|
|
|
|
// read in our global word selection prefs
|
|
|
|
if ( !sWordSelectPrefInited ) {
|
|
|
|
nsCOMPtr<nsIPref> prefService ( do_GetService(NS_PREF_CONTRACTID) );
|
|
|
|
if ( prefService ) {
|
|
|
|
PRBool temp = PR_FALSE;
|
|
|
|
prefService->GetBoolPref("layout.word_select.stop_at_punctuation", &temp);
|
|
|
|
sWordSelectStopAtPunctuation = temp;
|
|
|
|
}
|
|
|
|
sWordSelectPrefInited = PR_TRUE;
|
|
|
|
}
|
|
|
|
|
2001-10-16 16:38:19 +04:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
static nsresult EnsureCaseConv()
|
1999-10-20 03:01:45 +04:00
|
|
|
{
|
|
|
|
nsresult res = NS_OK;
|
|
|
|
if (!gCaseConv) {
|
2001-01-04 23:44:42 +03:00
|
|
|
res = nsServiceManager::GetService(kUnicharUtilCID, NS_GET_IID(nsICaseConversion),
|
1999-10-20 03:01:45 +04:00
|
|
|
(nsISupports**)&gCaseConv);
|
|
|
|
NS_ASSERTION( NS_SUCCEEDED(res), "cannot get UnicharUtil");
|
|
|
|
NS_ASSERTION( gCaseConv != NULL, "cannot get UnicharUtil");
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nsTextTransformer::Shutdown()
|
|
|
|
{
|
|
|
|
if (gCaseConv) {
|
|
|
|
nsServiceManager::ReleaseService(kUnicharUtilCID, gCaseConv);
|
|
|
|
gCaseConv = nsnull;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-12-13 00:58:13 +03:00
|
|
|
// For now, we have only a couple of characters to strip out. If we get
|
1999-10-26 03:04:51 +04:00
|
|
|
// any more, change this to use a bitset to lookup into.
|
2000-04-12 18:54:43 +04:00
|
|
|
// CH_SHY - soft hyphen (discretionary hyphen)
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
// added BIDI formatting codes
|
|
|
|
#define IS_DISCARDED(_ch) \
|
|
|
|
(((_ch) == CH_SHY) || ((_ch) == '\r') || IS_BIDI_CONTROL(_ch))
|
|
|
|
#else
|
1999-10-26 03:04:51 +04:00
|
|
|
#define IS_DISCARDED(_ch) \
|
2000-12-13 00:58:13 +03:00
|
|
|
(((_ch) == CH_SHY) || ((_ch) == '\r'))
|
2001-11-14 17:21:52 +03:00
|
|
|
#endif
|
|
|
|
|
1998-10-20 04:17:17 +04:00
|
|
|
|
|
|
|
#define MAX_UNIBYTE 127
|
|
|
|
|
2001-01-28 02:06:33 +03:00
|
|
|
MOZ_DECL_CTOR_COUNTER(nsTextTransformer)
|
1999-10-09 00:41:19 +04:00
|
|
|
|
1999-09-22 04:40:16 +04:00
|
|
|
nsTextTransformer::nsTextTransformer(nsILineBreaker* aLineBreaker,
|
2000-09-21 03:00:32 +04:00
|
|
|
nsIWordBreaker* aWordBreaker,
|
|
|
|
nsIPresContext* aPresContext)
|
2000-04-12 18:54:43 +04:00
|
|
|
: mFrag(nsnull),
|
1999-10-20 03:01:45 +04:00
|
|
|
mOffset(0),
|
|
|
|
mMode(eNormal),
|
1999-04-07 02:41:44 +04:00
|
|
|
mLineBreaker(aLineBreaker),
|
2000-04-04 18:14:47 +04:00
|
|
|
mWordBreaker(aWordBreaker),
|
2000-04-12 18:54:43 +04:00
|
|
|
mBufferPos(0),
|
2001-10-25 07:21:53 +04:00
|
|
|
mTextTransform(NS_STYLE_TEXT_TRANSFORM_NONE),
|
|
|
|
mFlags(0)
|
1998-10-20 04:17:17 +04:00
|
|
|
{
|
1999-10-09 00:41:19 +04:00
|
|
|
MOZ_COUNT_CTOR(nsTextTransformer);
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2000-09-21 03:00:32 +04:00
|
|
|
aPresContext->
|
|
|
|
GetLanguageSpecificTransformType(&mLanguageSpecificTransformType);
|
|
|
|
|
2002-06-12 01:00:20 +04:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
mPresContext = aPresContext;
|
|
|
|
#endif
|
2000-06-28 00:54:01 +04:00
|
|
|
if (aLineBreaker == nsnull && aWordBreaker == nsnull )
|
|
|
|
NS_ASSERTION(0, "invalid creation of nsTextTransformer");
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
#ifdef DEBUG
|
|
|
|
static PRBool firstTime = PR_TRUE;
|
|
|
|
if (firstTime) {
|
|
|
|
firstTime = PR_FALSE;
|
2000-09-21 03:00:32 +04:00
|
|
|
SelfTest(aLineBreaker, aWordBreaker, aPresContext);
|
1999-09-26 14:05:51 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
#endif
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
nsTextTransformer::~nsTextTransformer()
|
|
|
|
{
|
1999-10-09 00:41:19 +04:00
|
|
|
MOZ_COUNT_DTOR(nsTextTransformer);
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
nsresult
|
1999-09-17 03:31:59 +04:00
|
|
|
nsTextTransformer::Init(nsIFrame* aFrame,
|
|
|
|
nsIContent* aContent,
|
2000-04-12 18:54:43 +04:00
|
|
|
PRInt32 aStartingOffset,
|
|
|
|
PRBool aLeaveAsAscii)
|
1998-10-20 04:17:17 +04:00
|
|
|
{
|
2002-06-12 01:00:20 +04:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
PRBool bidiEnabled;
|
|
|
|
|
|
|
|
mPresContext->GetBidiEnabled(&bidiEnabled);
|
|
|
|
if (bidiEnabled) {
|
|
|
|
PRBool isBidiSystem;
|
|
|
|
aFrame->GetBidiProperty(mPresContext, nsLayoutAtoms::charType,
|
|
|
|
(void**)&mCharType, sizeof(mCharType));
|
|
|
|
mPresContext->GetIsBidiSystem(isBidiSystem);
|
|
|
|
if (mCharType == eCharType_RightToLeftArabic && !isBidiSystem) {
|
|
|
|
SetNeedsArabicShaping(PR_TRUE);
|
|
|
|
}
|
|
|
|
SetNeedsNumericShaping(PR_TRUE);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// Get the contents text content
|
|
|
|
nsresult rv;
|
|
|
|
nsCOMPtr<nsITextContent> tc = do_QueryInterface(aContent, &rv);
|
|
|
|
if (tc.get()) {
|
|
|
|
tc->GetText(&mFrag);
|
|
|
|
|
|
|
|
// Sanitize aStartingOffset
|
2002-01-24 12:20:51 +03:00
|
|
|
if (aStartingOffset < 0) {
|
|
|
|
NS_WARNING("bad starting offset");
|
1999-10-20 03:01:45 +04:00
|
|
|
aStartingOffset = 0;
|
|
|
|
}
|
2002-01-24 12:20:51 +03:00
|
|
|
else if (aStartingOffset > mFrag->GetLength()) {
|
|
|
|
NS_WARNING("bad starting offset");
|
1999-10-20 03:01:45 +04:00
|
|
|
aStartingOffset = mFrag->GetLength();
|
|
|
|
}
|
|
|
|
mOffset = aStartingOffset;
|
|
|
|
|
|
|
|
// Get the frames text style information
|
|
|
|
const nsStyleText* styleText;
|
|
|
|
aFrame->GetStyleData(eStyleStruct_Text, (const nsStyleStruct*&) styleText);
|
|
|
|
if (NS_STYLE_WHITESPACE_PRE == styleText->mWhiteSpace) {
|
|
|
|
mMode = ePreformatted;
|
|
|
|
}
|
|
|
|
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == styleText->mWhiteSpace) {
|
|
|
|
mMode = ePreWrap;
|
|
|
|
}
|
|
|
|
mTextTransform = styleText->mTextTransform;
|
2000-09-12 04:00:58 +04:00
|
|
|
|
|
|
|
if (aLeaveAsAscii) { // See if the text fragment is 1-byte text
|
|
|
|
SetLeaveAsAscii(PR_TRUE);
|
2000-04-12 18:54:43 +04:00
|
|
|
// XXX Currently we only leave it as ascii for normal text and not for preformatted
|
2000-09-21 03:00:32 +04:00
|
|
|
// or preformatted wrapped text or language specific transforms
|
|
|
|
if (mFrag->Is2b() || (eNormal != mMode) ||
|
|
|
|
(mLanguageSpecificTransformType !=
|
|
|
|
eLanguageSpecificTransformType_None))
|
2000-04-12 18:54:43 +04:00
|
|
|
// We don't step down from Unicode to ascii
|
2000-09-12 04:00:58 +04:00
|
|
|
SetLeaveAsAscii(PR_FALSE);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
SetLeaveAsAscii(PR_FALSE);
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
return rv;
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanNormalWhiteSpace_F()
|
1998-10-20 04:17:17 +04:00
|
|
|
{
|
1999-10-20 03:01:45 +04:00
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
|
|
|
|
for (; offset < fragLen; offset++) {
|
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if (!XP_IS_SPACE(ch)) {
|
1999-10-26 03:04:51 +04:00
|
|
|
// If character is not discardable then stop looping, otherwise
|
|
|
|
// let the discarded character collapse with the other spaces.
|
|
|
|
if (!IS_DISCARDED(ch)) {
|
|
|
|
break;
|
|
|
|
}
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2000-04-12 18:54:43 +04:00
|
|
|
// Make sure we have enough room in the transform buffer
|
|
|
|
if (mBufferPos >= mTransformBuf.mBufferLen) {
|
|
|
|
mTransformBuf.GrowBy(128);
|
|
|
|
}
|
|
|
|
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
unsigned char* bp = (unsigned char*)mTransformBuf.mBuffer;
|
|
|
|
bp[mBufferPos++] = ' ';
|
|
|
|
} else {
|
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
return offset;
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
2000-04-12 18:54:43 +04:00
|
|
|
|
|
|
|
void
|
|
|
|
nsTextTransformer::ConvertTransformedTextToUnicode()
|
|
|
|
{
|
|
|
|
// Go backwards over the characters and convert them.
|
|
|
|
PRInt32 lastChar = mBufferPos - 1;
|
|
|
|
unsigned char* cp1 = (unsigned char*)mTransformBuf.mBuffer + lastChar;
|
|
|
|
PRUnichar* cp2 = mTransformBuf.mBuffer + lastChar;
|
|
|
|
|
|
|
|
NS_ASSERTION(mTransformBuf.mBufferLen >= mBufferPos,
|
|
|
|
"transform buffer is too small");
|
|
|
|
for (PRInt32 count = mBufferPos; count > 0; count--) {
|
|
|
|
*cp2-- = PRUnichar(*cp1--);
|
|
|
|
}
|
|
|
|
}
|
1998-10-20 04:17:17 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
2000-04-12 18:54:43 +04:00
|
|
|
nsTextTransformer::ScanNormalAsciiText_F(PRInt32* aWordLen,
|
|
|
|
PRBool* aWasTransformed)
|
1998-10-20 04:17:17 +04:00
|
|
|
{
|
1999-10-20 03:01:45 +04:00
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
|
|
|
PRInt32 offset = mOffset;
|
2000-04-04 18:14:47 +04:00
|
|
|
PRInt32 prevBufferPos = mBufferPos;
|
2000-04-12 18:54:43 +04:00
|
|
|
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
|
|
|
|
union {
|
|
|
|
unsigned char* bp1;
|
|
|
|
PRUnichar* bp2;
|
|
|
|
};
|
|
|
|
bp2 = mTransformBuf.GetBuffer();
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
bp1 += mBufferPos;
|
|
|
|
} else {
|
|
|
|
bp2 += mBufferPos;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2000-08-15 02:11:41 +04:00
|
|
|
for (; offset < fragLen; offset++) {
|
2000-04-12 18:54:43 +04:00
|
|
|
unsigned char ch = *cp++;
|
1999-10-20 03:01:45 +04:00
|
|
|
if (XP_IS_SPACE(ch)) {
|
|
|
|
break;
|
|
|
|
}
|
2000-08-15 02:11:41 +04:00
|
|
|
if (CH_NBSP == ch) {
|
1999-10-22 00:47:36 +04:00
|
|
|
ch = ' ';
|
2000-04-12 18:54:43 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
1999-10-22 00:47:36 +04:00
|
|
|
}
|
2000-08-15 02:11:41 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
1999-10-26 03:04:51 +04:00
|
|
|
// Strip discarded characters from the transformed output
|
|
|
|
continue;
|
|
|
|
}
|
2000-04-12 18:54:43 +04:00
|
|
|
if (ch > MAX_UNIBYTE) {
|
|
|
|
// The text has a multibyte character so we can no longer leave the
|
|
|
|
// text as ascii text
|
2000-09-12 04:00:58 +04:00
|
|
|
SetHasMultibyte(PR_TRUE);
|
|
|
|
|
|
|
|
if (TransformedTextIsAscii()) {
|
|
|
|
SetTransformedTextIsAscii(PR_FALSE);
|
2000-04-12 18:54:43 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
|
|
|
|
// Transform any existing ascii text to Unicode
|
|
|
|
if (mBufferPos > 0) {
|
|
|
|
ConvertTransformedTextToUnicode();
|
|
|
|
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (mBufferPos >= mTransformBuf.mBufferLen) {
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(128);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (NS_FAILED(rv)) {
|
2000-04-12 18:54:43 +04:00
|
|
|
// If we run out of space then just truncate the text
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
}
|
2000-04-12 18:54:43 +04:00
|
|
|
bp2 = mTransformBuf.GetBuffer();
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
bp1 += mBufferPos;
|
|
|
|
} else {
|
|
|
|
bp2 += mBufferPos;
|
|
|
|
}
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
*bp1++ = ch;
|
|
|
|
} else {
|
|
|
|
*bp2++ = PRUnichar(ch);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
2000-04-04 18:14:47 +04:00
|
|
|
mBufferPos++;
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
2000-08-25 01:15:19 +04:00
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanNormalAsciiText_F_ForWordBreak(PRInt32* aWordLen,
|
|
|
|
PRBool* aWasTransformed)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRInt32 prevBufferPos = mBufferPos;
|
|
|
|
PRBool breakAfterThis = PR_FALSE;
|
|
|
|
const unsigned char* cp = (const unsigned char*)frag->Get1b() + offset;
|
|
|
|
union {
|
|
|
|
unsigned char* bp1;
|
|
|
|
PRUnichar* bp2;
|
|
|
|
};
|
|
|
|
bp2 = mTransformBuf.GetBuffer();
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-08-25 01:15:19 +04:00
|
|
|
bp1 += mBufferPos;
|
|
|
|
} else {
|
|
|
|
bp2 += mBufferPos;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; offset < fragLen && !breakAfterThis; offset++) {
|
|
|
|
unsigned char ch = *cp++;
|
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
if (offset == mOffset)
|
|
|
|
breakAfterThis = PR_TRUE;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (XP_IS_SPACE(ch)) {
|
|
|
|
break;
|
|
|
|
}
|
2001-10-19 18:10:22 +04:00
|
|
|
else if (sWordSelectStopAtPunctuation && !isalnum(ch)) {
|
|
|
|
// on some platforms, punctuation breaks words too.
|
|
|
|
break;
|
|
|
|
}
|
2000-08-25 01:15:19 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
// Strip discarded characters from the transformed output
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (ch > MAX_UNIBYTE) {
|
|
|
|
// The text has a multibyte character so we can no longer leave the
|
|
|
|
// text as ascii text
|
2000-09-12 04:00:58 +04:00
|
|
|
SetHasMultibyte(PR_TRUE);
|
2000-08-25 01:15:19 +04:00
|
|
|
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
|
|
|
SetTransformedTextIsAscii(PR_FALSE);
|
2000-08-25 01:15:19 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
|
|
|
|
// Transform any existing ascii text to Unicode
|
|
|
|
if (mBufferPos > 0) {
|
|
|
|
ConvertTransformedTextToUnicode();
|
|
|
|
bp2 = mTransformBuf.GetBuffer() + mBufferPos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (mBufferPos >= mTransformBuf.mBufferLen) {
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(128);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space then just truncate the text
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bp2 = mTransformBuf.GetBuffer();
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-08-25 01:15:19 +04:00
|
|
|
bp1 += mBufferPos;
|
|
|
|
} else {
|
|
|
|
bp2 += mBufferPos;
|
|
|
|
}
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-08-25 01:15:19 +04:00
|
|
|
*bp1++ = ch;
|
|
|
|
} else {
|
|
|
|
*bp2++ = PRUnichar(ch);
|
|
|
|
}
|
|
|
|
mBufferPos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
2000-04-12 18:54:43 +04:00
|
|
|
nsTextTransformer::ScanNormalUnicodeText_F(PRBool aForLineBreak,
|
|
|
|
PRInt32* aWordLen,
|
|
|
|
PRBool* aWasTransformed)
|
1999-10-20 03:01:45 +04:00
|
|
|
{
|
1999-10-16 03:36:07 +04:00
|
|
|
const nsTextFragment* frag = mFrag;
|
1999-10-20 03:01:45 +04:00
|
|
|
const PRUnichar* cp0 = frag->Get2b();
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
if (*aWordLen > 0 && *aWordLen < fragLen) {
|
|
|
|
fragLen = *aWordLen;
|
|
|
|
}
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
|
|
|
|
PRUnichar firstChar = frag->CharAt(offset++);
|
2001-11-14 17:21:52 +03:00
|
|
|
|
|
|
|
#ifdef IBMBIDI
|
|
|
|
// Need to strip BIDI controls even when those are 'firstChars'.
|
|
|
|
// This doesn't seem to produce bug 14280 (or similar bugs).
|
|
|
|
while (offset < fragLen && IS_BIDI_CONTROL(firstChar) ) {
|
|
|
|
firstChar = frag->CharAt(offset++);
|
|
|
|
}
|
|
|
|
#endif // IBMBIDI
|
|
|
|
|
2000-09-12 04:00:58 +04:00
|
|
|
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
// Only evaluate complex breaking logic if there are more characters
|
|
|
|
// beyond the first to look at.
|
|
|
|
PRInt32 numChars = 1;
|
|
|
|
if (offset < fragLen) {
|
|
|
|
const PRUnichar* cp = cp0 + offset;
|
|
|
|
PRBool breakBetween = PR_FALSE;
|
|
|
|
if (aForLineBreak) {
|
2000-04-04 18:14:47 +04:00
|
|
|
mLineBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
else {
|
2000-04-04 18:14:47 +04:00
|
|
|
mWordBreaker->BreakInBetween(&firstChar, 1, cp, (fragLen-offset), &breakBetween);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
bug 14280
nsTextTransformer.cpp.
I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account
for the space as non-breaking and therefore part of the first word in the block.
bug 39901 and 38396
nsHTMLImageLoader.*, nsImageFrame.cpp
I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image
gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate
bug that I'll file shortly. But this fixes the vast majority of real cases out there.
bug 18754
nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp
in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get
newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach.
bug 50257
nsLineLayout.cpp
Did a couple of things in here:
* The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line.
A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a
line. This is a wimpier, safer solution.
* I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that.
* I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly
created object is a waste.
nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
|
|
|
// don't transform the first character until after BreakInBetween is called
|
|
|
|
// Kipp originally did this at the top of the function, which was too early.
|
|
|
|
// see bug 14280
|
|
|
|
if (CH_NBSP == firstChar) {
|
|
|
|
firstChar = ' ';
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
2000-09-23 02:45:31 +04:00
|
|
|
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
*aWordLen = 0;
|
|
|
|
return offset - 1;
|
|
|
|
}
|
|
|
|
|
bug 14280
nsTextTransformer.cpp.
I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account
for the space as non-breaking and therefore part of the first word in the block.
bug 39901 and 38396
nsHTMLImageLoader.*, nsImageFrame.cpp
I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image
gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate
bug that I'll file shortly. But this fixes the vast majority of real cases out there.
bug 18754
nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp
in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get
newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach.
bug 50257
nsLineLayout.cpp
Did a couple of things in here:
* The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line.
A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a
line. This is a wimpier, safer solution.
* I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that.
* I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly
created object is a waste.
nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
if (!breakBetween) {
|
|
|
|
// Find next position
|
|
|
|
PRBool tryNextFrag;
|
|
|
|
PRUint32 next;
|
|
|
|
if (aForLineBreak) {
|
|
|
|
mLineBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
|
1998-10-22 00:05:31 +04:00
|
|
|
}
|
|
|
|
else {
|
1999-10-20 03:01:45 +04:00
|
|
|
mWordBreaker->Next(cp0, fragLen, offset, &next, &tryNextFrag);
|
|
|
|
}
|
|
|
|
numChars = (PRInt32) (next - (PRUint32) offset) + 1;
|
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
// Since we know the number of characters we're adding grow the buffer
|
|
|
|
// now before we start copying
|
|
|
|
nsresult rv = mTransformBuf.GrowTo(mBufferPos + numChars);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (NS_FAILED(rv)) {
|
2000-04-04 18:14:47 +04:00
|
|
|
numChars = mTransformBuf.GetBufferLength() - mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
offset += numChars - 1;
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// 1. convert nbsp into space
|
2000-04-04 18:14:47 +04:00
|
|
|
// 2. check for discarded characters
|
|
|
|
// 3. check mHasMultibyte flag
|
|
|
|
// 4. copy buffer
|
|
|
|
PRUnichar* bp = &mTransformBuf.mBuffer[mBufferPos];
|
1999-10-20 03:01:45 +04:00
|
|
|
const PRUnichar* end = cp + numChars - 1;
|
|
|
|
while (cp < end) {
|
|
|
|
PRUnichar ch = *cp++;
|
1999-10-26 03:04:51 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
|
|
|
}
|
2000-03-24 23:23:23 +03:00
|
|
|
else if (IS_DISCARDED(ch) || (ch == 0x0a) || (ch == 0x0d)) {
|
1999-10-26 03:04:51 +04:00
|
|
|
// Strip discarded characters from the transformed output
|
2000-04-04 18:14:47 +04:00
|
|
|
numChars--;
|
1999-10-26 03:04:51 +04:00
|
|
|
continue;
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
*bp++ = ch;
|
2000-04-04 18:14:47 +04:00
|
|
|
mBufferPos++;
|
1998-10-22 00:05:31 +04:00
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
}
|
1998-10-22 00:05:31 +04:00
|
|
|
}
|
bug 14280
nsTextTransformer.cpp.
I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account
for the space as non-breaking and therefore part of the first word in the block.
bug 39901 and 38396
nsHTMLImageLoader.*, nsImageFrame.cpp
I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image
gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate
bug that I'll file shortly. But this fixes the vast majority of real cases out there.
bug 18754
nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp
in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get
newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach.
bug 50257
nsLineLayout.cpp
Did a couple of things in here:
* The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line.
A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a
line. This is a wimpier, safer solution.
* I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that.
* I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly
created object is a waste.
nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
|
|
|
else
|
|
|
|
{ // transform the first character
|
|
|
|
// we do this here, rather than at the top of the function (like Kipp originally had it)
|
|
|
|
// because if we must call BreakInBetween, then we must do so before the transformation
|
|
|
|
// this is the case where BreakInBetween does not need to be called at all.
|
|
|
|
// see bug 14280
|
|
|
|
if (CH_NBSP == firstChar) {
|
|
|
|
firstChar = ' ';
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
2000-09-23 02:45:31 +04:00
|
|
|
nsresult rv = mTransformBuf.GrowTo(mBufferPos + 1);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
*aWordLen = 0;
|
|
|
|
return offset - 1;
|
|
|
|
}
|
bug 14280
nsTextTransformer.cpp.
I moved where we translate the nbsp to a (ascii 32 space character) until after the i18n routines are called, so they can properly account
for the space as non-breaking and therefore part of the first word in the block.
bug 39901 and 38396
nsHTMLImageLoader.*, nsImageFrame.cpp
I backed out the bad fix for 38396, and put in a new fix where I store a little state in the image loader flags for cases where the image
gets an unconstrained reflow and has %-based width. This does not handle %-based min-width or max-width, that would be a separate
bug that I'll file shortly. But this fixes the vast majority of real cases out there.
bug 18754
nsHRFrame.cpp, quirks.css, nsCSSFrameConstructor.cpp, last part of nsLineLayout.cpp
in quirks mode, I changed HR from a block element to a replaced inline element that acts like a block, using generated content to get
newlines before and after the HR. This isn't ideal, but it gets us backwards compatibility, and ian and dbaron have blessed the approach.
bug 50257
nsLineLayout.cpp
Did a couple of things in here:
* The actual fix is controlled by FIX_BUG_50257 #define symbol. This basically says that an break (BR) will always fit on a line.
A more general solution would probably be to round up to the nearest pixel, and if the thing is less than a pixel make it fit on a
line. This is a wimpier, safer solution.
* I noticed that the way we got the compatibility mode was way out of date, very wasteful. So I fixed that.
* I noticed that there were a bunch of redundant SetFlag calls. Since the flag variable is initialized to 0, setting a flag to 0 on a newly
created object is a waste.
nsBlockFrame.cpp -- just added a comment to some odd looking code, to make sure no one comes along later and breaks it
2000-09-12 01:15:02 +04:00
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
*aWordLen = numChars;
|
1999-10-26 03:04:51 +04:00
|
|
|
return offset;
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanPreWrapWhiteSpace_F(PRInt32* aWordLen)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
|
|
|
PRInt32 offset = mOffset;
|
2000-04-04 18:14:47 +04:00
|
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
2000-04-04 18:14:47 +04:00
|
|
|
PRInt32 prevBufferPos = mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
for (; offset < fragLen; offset++) {
|
2000-04-04 18:14:47 +04:00
|
|
|
// This function is used for both Unicode and ascii strings so don't
|
|
|
|
// make any assumptions about what kind of data it is
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
|
1999-10-26 03:04:51 +04:00
|
|
|
if (IS_DISCARDED(ch)) {
|
|
|
|
// Keep looping if this is a discarded character
|
|
|
|
continue;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (bp == endbp) {
|
|
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
|
|
}
|
|
|
|
*bp++ = ' ';
|
2000-04-04 18:14:47 +04:00
|
|
|
mBufferPos++;
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
2000-04-12 18:54:43 +04:00
|
|
|
nsTextTransformer::ScanPreData_F(PRInt32* aWordLen,
|
|
|
|
PRBool* aWasTransformed)
|
1999-10-20 03:01:45 +04:00
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
|
|
|
PRInt32 offset = mOffset;
|
2000-04-04 18:14:47 +04:00
|
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
2000-04-04 18:14:47 +04:00
|
|
|
PRInt32 prevBufferPos = mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
for (; offset < fragLen; offset++) {
|
2000-04-04 18:14:47 +04:00
|
|
|
// This function is used for both Unicode and ascii strings so don't
|
|
|
|
// make any assumptions about what kind of data it is
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
|
|
break;
|
|
|
|
}
|
1999-10-22 00:47:36 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
2000-04-12 18:54:43 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
1999-10-22 00:47:36 +04:00
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (bp == endbp) {
|
|
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
|
|
endbp = mTransformBuf.GetBufferEnd();
|
|
|
|
}
|
|
|
|
*bp++ = ch;
|
2000-04-04 18:14:47 +04:00
|
|
|
mBufferPos++;
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
1998-10-20 04:17:17 +04:00
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
return offset;
|
|
|
|
}
|
1999-10-16 03:36:07 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
2000-04-12 18:54:43 +04:00
|
|
|
nsTextTransformer::ScanPreAsciiData_F(PRInt32* aWordLen,
|
|
|
|
PRBool* aWasTransformed)
|
1999-10-20 03:01:45 +04:00
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
2000-04-04 18:14:47 +04:00
|
|
|
PRUnichar* bp = mTransformBuf.GetBuffer() + mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar* endbp = mTransformBuf.GetBufferEnd();
|
1999-10-26 03:04:51 +04:00
|
|
|
const unsigned char* cp = (const unsigned char*) frag->Get1b();
|
|
|
|
const unsigned char* end = cp + frag->GetLength();
|
2000-04-04 18:14:47 +04:00
|
|
|
PRInt32 prevBufferPos = mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
cp += mOffset;
|
|
|
|
|
|
|
|
while (cp < end) {
|
|
|
|
PRUnichar ch = (PRUnichar) *cp++;
|
|
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
|
|
cp--;
|
|
|
|
break;
|
|
|
|
}
|
1999-10-22 00:47:36 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
2000-04-12 18:54:43 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
1999-10-22 00:47:36 +04:00
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (bp == endbp) {
|
|
|
|
PRInt32 oldLength = bp - mTransformBuf.GetBuffer();
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
break;
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
bp = mTransformBuf.GetBuffer() + oldLength;
|
|
|
|
endbp = mTransformBuf.GetBufferEnd();
|
1999-10-16 03:36:07 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
*bp++ = ch;
|
2000-04-04 18:14:47 +04:00
|
|
|
mBufferPos++;
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
1999-10-16 03:36:07 +04:00
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
*aWordLen = mBufferPos - prevBufferPos;
|
1999-10-26 03:04:51 +04:00
|
|
|
return cp - ((const unsigned char*)frag->Get1b());
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
1999-10-16 03:36:07 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
//----------------------------------------
|
1999-10-16 03:36:07 +04:00
|
|
|
|
2000-04-12 18:54:43 +04:00
|
|
|
static void
|
|
|
|
AsciiToLowerCase(unsigned char* aText, PRInt32 aWordLen)
|
|
|
|
{
|
|
|
|
while (aWordLen-- > 0) {
|
|
|
|
*aText = tolower(*aText);
|
|
|
|
aText++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
AsciiToUpperCase(unsigned char* aText, PRInt32 aWordLen)
|
|
|
|
{
|
|
|
|
while (aWordLen-- > 0) {
|
|
|
|
*aText = toupper(*aText);
|
|
|
|
aText++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-05-10 23:44:09 +04:00
|
|
|
#define kSzlig 0x00DF
|
|
|
|
static PRInt32 CountGermanSzlig(const PRUnichar* aText, PRInt32 len)
|
|
|
|
{
|
|
|
|
PRInt32 i,cnt;
|
|
|
|
for(i=0,cnt=0; i<len; i++, aText++)
|
|
|
|
{
|
|
|
|
if(kSzlig == *aText)
|
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
static void ReplaceGermanSzligToSS(PRUnichar* aText, PRInt32 len, PRInt32 szCnt)
|
|
|
|
{
|
|
|
|
PRUnichar *src, *dest;
|
2000-09-06 01:19:23 +04:00
|
|
|
src = aText + len - 1;
|
2000-05-10 23:44:09 +04:00
|
|
|
dest = src + szCnt;
|
|
|
|
while( (src!=dest) && (src >= aText) )
|
|
|
|
{
|
|
|
|
if(kSzlig == *src )
|
|
|
|
{
|
|
|
|
*dest-- = PRUnichar('S');
|
|
|
|
*dest-- = PRUnichar('S');
|
|
|
|
src--;
|
|
|
|
} else {
|
|
|
|
*dest-- = *src--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2000-09-21 03:00:32 +04:00
|
|
|
void
|
|
|
|
nsTextTransformer::LanguageSpecificTransform(PRUnichar* aText, PRInt32 aLen,
|
|
|
|
PRBool* aWasTransformed)
|
|
|
|
{
|
|
|
|
if (mLanguageSpecificTransformType ==
|
|
|
|
eLanguageSpecificTransformType_Japanese) {
|
|
|
|
for (PRInt32 i = 0; i < aLen; i++) {
|
|
|
|
if (aText[i] == 0x5C) { // BACKSLASH
|
|
|
|
aText[i] = 0xA5; // YEN SIGN
|
2000-11-01 01:06:59 +03:00
|
|
|
SetHasMultibyte(PR_TRUE);
|
2000-09-21 03:00:32 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* We considered doing this, but since some systems may not have fonts
|
|
|
|
* with this OVERLINE glyph, we decided not to do this.
|
|
|
|
*/
|
|
|
|
else if (aText[i] == 0x7E) { // TILDE
|
|
|
|
aText[i] = 0x203E; // OVERLINE
|
2000-11-01 01:06:59 +03:00
|
|
|
SetHasMultibyte(PR_TRUE);
|
2000-09-21 03:00:32 +04:00
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
2001-10-11 04:22:13 +04:00
|
|
|
/* we once do transformation for Korean, but later decide to remove it */
|
|
|
|
/* see bug 88050 for more information */
|
2000-09-21 03:00:32 +04:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar*
|
|
|
|
nsTextTransformer::GetNextWord(PRBool aInWord,
|
|
|
|
PRInt32* aWordLenResult,
|
|
|
|
PRInt32* aContentLenResult,
|
|
|
|
PRBool* aIsWhiteSpaceResult,
|
2000-04-12 18:54:43 +04:00
|
|
|
PRBool* aWasTransformed,
|
2000-04-04 18:14:47 +04:00
|
|
|
PRBool aResetTransformBuf,
|
1999-10-20 03:01:45 +04:00
|
|
|
PRBool aForLineBreak)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 fragLen = frag->GetLength();
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
if (*aWordLenResult > 0 && *aWordLenResult < fragLen) {
|
|
|
|
fragLen = *aWordLenResult;
|
|
|
|
}
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRInt32 wordLen = 0;
|
|
|
|
PRBool isWhitespace = PR_FALSE;
|
|
|
|
PRUnichar* result = nsnull;
|
2000-04-04 18:14:47 +04:00
|
|
|
PRBool prevBufferPos;
|
|
|
|
|
2000-04-12 18:54:43 +04:00
|
|
|
// Initialize OUT parameter
|
|
|
|
*aWasTransformed = PR_FALSE;
|
|
|
|
|
2000-04-04 18:14:47 +04:00
|
|
|
// See if we should reset the current buffer position back to the
|
|
|
|
// beginning of the buffer
|
|
|
|
if (aResetTransformBuf) {
|
|
|
|
mBufferPos = 0;
|
2000-09-12 04:00:58 +04:00
|
|
|
SetTransformedTextIsAscii(LeaveAsAscii());
|
2000-04-04 18:14:47 +04:00
|
|
|
}
|
|
|
|
prevBufferPos = mBufferPos;
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2000-01-12 01:07:13 +03:00
|
|
|
// Fix word breaking problem w/ PREFORMAT and PREWRAP
|
|
|
|
// for word breaking, we should really go to the normal code
|
|
|
|
if((! aForLineBreak) && (eNormal != mMode))
|
|
|
|
mMode = eNormal;
|
|
|
|
|
1999-10-26 03:04:51 +04:00
|
|
|
while (offset < fragLen) {
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
1999-10-26 03:04:51 +04:00
|
|
|
|
|
|
|
// Eat up any discarded characters before dispatching
|
|
|
|
if (IS_DISCARDED(firstChar)) {
|
|
|
|
offset++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
switch (mMode) {
|
|
|
|
default:
|
|
|
|
case eNormal:
|
|
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
|
|
offset = ScanNormalWhiteSpace_F();
|
2000-04-12 18:54:43 +04:00
|
|
|
if (firstChar != ' ') {
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
1999-10-16 03:36:07 +04:00
|
|
|
}
|
2000-08-25 01:15:19 +04:00
|
|
|
else if (CH_NBSP == firstChar && !aForLineBreak) {
|
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
|
|
|
|
// Make sure we have enough room in the transform buffer
|
|
|
|
if (mBufferPos >= mTransformBuf.mBufferLen) {
|
|
|
|
mTransformBuf.GrowBy(128);
|
|
|
|
}
|
|
|
|
|
|
|
|
offset++;
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-08-25 01:15:19 +04:00
|
|
|
((unsigned char*)mTransformBuf.mBuffer)[mBufferPos++] = ' ';
|
|
|
|
} else {
|
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = PRUnichar(' ');
|
|
|
|
}
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else if (frag->Is2b()) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = *aWordLenResult;
|
|
|
|
#endif
|
2000-04-12 18:54:43 +04:00
|
|
|
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
2000-08-25 01:15:19 +04:00
|
|
|
if (!aForLineBreak)
|
|
|
|
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
|
|
|
|
else
|
|
|
|
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
|
1999-10-16 03:36:07 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ePreformatted:
|
|
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
2000-04-04 18:14:47 +04:00
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
1999-10-20 03:01:45 +04:00
|
|
|
offset++;
|
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
}
|
|
|
|
else if (frag->Is2b()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
offset = ScanPreData_F(&wordLen, aWasTransformed);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
else {
|
2000-04-12 18:54:43 +04:00
|
|
|
offset = ScanPreAsciiData_F(&wordLen, aWasTransformed);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ePreWrap:
|
|
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
2000-04-04 18:14:47 +04:00
|
|
|
mTransformBuf.mBuffer[mBufferPos++] = firstChar;
|
1999-10-20 03:01:45 +04:00
|
|
|
offset++;
|
|
|
|
wordLen = 1;
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
|
|
|
offset = ScanPreWrapWhiteSpace_F(&wordLen);
|
|
|
|
}
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
}
|
|
|
|
else if (frag->Is2b()) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = *aWordLenResult;
|
|
|
|
#endif
|
2000-04-12 18:54:43 +04:00
|
|
|
offset = ScanNormalUnicodeText_F(aForLineBreak, &wordLen, aWasTransformed);
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
2000-08-25 01:15:19 +04:00
|
|
|
if (!aForLineBreak)
|
|
|
|
offset = ScanNormalAsciiText_F_ForWordBreak(&wordLen, aWasTransformed);
|
|
|
|
else
|
|
|
|
offset = ScanNormalAsciiText_F(&wordLen, aWasTransformed);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2000-09-12 04:00:58 +04:00
|
|
|
if (TransformedTextIsAscii()) {
|
2000-04-12 18:54:43 +04:00
|
|
|
unsigned char* wordPtr = (unsigned char*)mTransformBuf.mBuffer + prevBufferPos;
|
|
|
|
|
|
|
|
if (!isWhitespace) {
|
|
|
|
switch (mTextTransform) {
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
|
|
|
|
*wordPtr = toupper(*wordPtr);
|
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
|
|
|
AsciiToLowerCase(wordPtr, wordLen);
|
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
|
|
|
AsciiToUpperCase(wordPtr, wordLen);
|
|
|
|
break;
|
|
|
|
}
|
2000-09-21 03:00:32 +04:00
|
|
|
NS_ASSERTION(mLanguageSpecificTransformType ==
|
|
|
|
eLanguageSpecificTransformType_None,
|
|
|
|
"should not be ASCII for language specific transforms");
|
2000-04-12 18:54:43 +04:00
|
|
|
}
|
|
|
|
result = (PRUnichar*)wordPtr;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
result = &mTransformBuf.mBuffer[prevBufferPos];
|
|
|
|
|
|
|
|
if (!isWhitespace) {
|
|
|
|
switch (mTextTransform) {
|
1999-10-20 03:01:45 +04:00
|
|
|
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
|
2000-05-10 23:44:09 +04:00
|
|
|
// if the first character is szlig
|
|
|
|
if(kSzlig == *result)
|
|
|
|
{
|
|
|
|
if ((prevBufferPos + wordLen + 1) >= mTransformBuf.mBufferLen) {
|
|
|
|
mTransformBuf.GrowBy(128);
|
|
|
|
result = &mTransformBuf.mBuffer[prevBufferPos];
|
|
|
|
}
|
|
|
|
PRUnichar* src = result + wordLen;
|
|
|
|
while(src>result)
|
|
|
|
{
|
|
|
|
*(src+1) = *src;
|
|
|
|
src--;
|
|
|
|
}
|
|
|
|
result[0] = PRUnichar('S');
|
|
|
|
result[1] = PRUnichar('S');
|
|
|
|
wordLen++;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToLower(result, result, wordLen);
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
2000-05-10 23:44:09 +04:00
|
|
|
{
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToUpper(result, result, wordLen);
|
2000-05-10 23:44:09 +04:00
|
|
|
|
|
|
|
// first we search for German Szlig
|
|
|
|
PRInt32 szligCnt = CountGermanSzlig(result, wordLen);
|
|
|
|
if(szligCnt > 0) {
|
|
|
|
// Make sure we have enough room in the transform buffer
|
|
|
|
if ((prevBufferPos + wordLen + szligCnt) >= mTransformBuf.mBufferLen)
|
|
|
|
{
|
|
|
|
mTransformBuf.GrowBy(128);
|
|
|
|
result = &mTransformBuf.mBuffer[prevBufferPos];
|
|
|
|
}
|
|
|
|
ReplaceGermanSzligToSS(result, wordLen, szligCnt);
|
|
|
|
wordLen += szligCnt;
|
|
|
|
}
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
2000-04-12 18:54:43 +04:00
|
|
|
}
|
2000-09-21 03:00:32 +04:00
|
|
|
if (mLanguageSpecificTransformType !=
|
|
|
|
eLanguageSpecificTransformType_None) {
|
|
|
|
LanguageSpecificTransform(result, wordLen, aWasTransformed);
|
|
|
|
}
|
2002-06-12 01:00:20 +04:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
if (NeedsArabicShaping()) {
|
|
|
|
DoArabicShaping(result, wordLen, aWasTransformed);
|
|
|
|
}
|
|
|
|
if (NeedsNumericShaping()) {
|
|
|
|
DoNumericShaping(result, wordLen, aWasTransformed);
|
|
|
|
}
|
|
|
|
#endif
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
|
|
|
}
|
2000-04-12 18:54:43 +04:00
|
|
|
|
1999-10-26 03:04:51 +04:00
|
|
|
break;
|
1998-10-20 20:45:14 +04:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
*aWordLenResult = wordLen;
|
|
|
|
*aContentLenResult = offset - mOffset;
|
|
|
|
*aIsWhiteSpaceResult = isWhitespace;
|
1998-10-20 04:17:17 +04:00
|
|
|
|
2000-04-12 18:54:43 +04:00
|
|
|
// If the word length doesn't match the content length then we transformed
|
|
|
|
// the text
|
|
|
|
if ((mTextTransform != NS_STYLE_TEXT_TRANSFORM_NONE) ||
|
|
|
|
(*aWordLenResult != *aContentLenResult)) {
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
1998-10-22 00:05:31 +04:00
|
|
|
|
2000-04-12 18:54:43 +04:00
|
|
|
mOffset = offset;
|
1999-10-20 03:01:45 +04:00
|
|
|
return result;
|
1998-10-20 04:17:17 +04:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
|
|
|
// wordlen==1, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanNormalWhiteSpace_B()
|
1999-02-22 06:20:59 +03:00
|
|
|
{
|
1999-10-20 03:01:45 +04:00
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
|
|
|
|
while (--offset >= 0) {
|
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if (!XP_IS_SPACE(ch)) {
|
1999-10-26 03:04:51 +04:00
|
|
|
// If character is not discardable then stop looping, otherwise
|
|
|
|
// let the discarded character collapse with the other spaces.
|
|
|
|
if (!IS_DISCARDED(ch)) {
|
|
|
|
break;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanNormalAsciiText_B(PRInt32* aWordLen)
|
|
|
|
{
|
1999-10-16 03:36:07 +04:00
|
|
|
const nsTextFragment* frag = mFrag;
|
1999-10-20 03:01:45 +04:00
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
|
|
|
|
while (--offset >= 0) {
|
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
2000-08-15 02:11:41 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
|
|
|
}
|
2000-08-25 01:15:19 +04:00
|
|
|
if (XP_IS_SPACE(ch)) {
|
|
|
|
break;
|
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
2000-09-12 04:00:58 +04:00
|
|
|
}
|
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (bp == startbp) {
|
|
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
break;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
|
|
startbp = mTransformBuf.GetBuffer();
|
|
|
|
}
|
|
|
|
*--bp = ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanNormalUnicodeText_B(PRBool aForLineBreak,
|
|
|
|
PRInt32* aWordLen)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
const PRUnichar* cp0 = frag->Get2b();
|
|
|
|
PRInt32 offset = mOffset - 1;
|
|
|
|
|
|
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
2001-11-14 17:21:52 +03:00
|
|
|
|
|
|
|
#ifdef IBMBIDI
|
|
|
|
PRInt32 limit = (*aWordLen > 0) ? *aWordLen : 0;
|
|
|
|
|
|
|
|
while (offset > limit && IS_BIDI_CONTROL(firstChar) ) {
|
|
|
|
firstChar = frag->CharAt(--offset);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = firstChar;
|
2000-09-12 04:00:58 +04:00
|
|
|
if (firstChar > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
PRInt32 numChars = 1;
|
2001-11-14 17:21:52 +03:00
|
|
|
|
|
|
|
#ifdef IBMBIDI
|
|
|
|
if (offset > limit) {
|
|
|
|
#else
|
1999-10-20 03:01:45 +04:00
|
|
|
if (offset > 0) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
const PRUnichar* cp = cp0 + offset;
|
|
|
|
PRBool breakBetween = PR_FALSE;
|
|
|
|
if (aForLineBreak) {
|
|
|
|
mLineBreaker->BreakInBetween(cp0, offset + 1,
|
|
|
|
mTransformBuf.GetBufferEnd()-1, 1,
|
|
|
|
&breakBetween);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
mWordBreaker->BreakInBetween(cp0, offset + 1,
|
|
|
|
mTransformBuf.GetBufferEnd()-1, 1,
|
|
|
|
&breakBetween);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!breakBetween) {
|
|
|
|
// Find next position
|
|
|
|
PRBool tryPrevFrag;
|
|
|
|
PRUint32 prev;
|
|
|
|
if (aForLineBreak) {
|
|
|
|
mLineBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
else {
|
1999-10-20 03:01:45 +04:00
|
|
|
mWordBreaker->Prev(cp0, offset, offset, &prev, &tryPrevFrag);
|
|
|
|
}
|
|
|
|
numChars = (PRInt32) ((PRUint32) offset - prev) + 1;
|
|
|
|
|
|
|
|
// Grow buffer before copying
|
|
|
|
nsresult rv = mTransformBuf.GrowTo(numChars);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
numChars = mTransformBuf.GetBufferLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
// 1. convert nbsp into space
|
|
|
|
// 2. check mHasMultibyte flag
|
|
|
|
// 3. copy buffer
|
|
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd() - 1;
|
1999-10-26 03:04:51 +04:00
|
|
|
const PRUnichar* end = cp - numChars + 1;
|
1999-10-20 03:01:45 +04:00
|
|
|
while (cp > end) {
|
|
|
|
PRUnichar ch = *--cp;
|
1999-10-26 03:04:51 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
|
|
|
}
|
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
*--bp = ch;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
|
|
|
|
// Recompute offset and numChars in case we stripped something
|
|
|
|
offset = offset - numChars;
|
|
|
|
numChars = mTransformBuf.GetBufferEnd() - bp;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
}
|
2000-05-16 23:52:05 +04:00
|
|
|
else
|
|
|
|
offset--;
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
*aWordLen = numChars;
|
1999-10-26 03:04:51 +04:00
|
|
|
return offset;
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=t
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanPreWrapWhiteSpace_B(PRInt32* aWordLen)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
|
|
|
|
while (--offset >= 0) {
|
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if (!XP_IS_SPACE(ch) || (ch == '\t') || (ch == '\n')) {
|
1999-10-26 03:04:51 +04:00
|
|
|
// Keep looping if this is a discarded character
|
|
|
|
if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (bp == startbp) {
|
|
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
|
|
startbp = mTransformBuf.GetBuffer();
|
|
|
|
}
|
|
|
|
*--bp = ' ';
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
|
|
return offset;
|
|
|
|
}
|
1999-10-16 03:36:07 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
// wordlen==*aWordLen, contentlen=newOffset-currentOffset, isWhitespace=f
|
|
|
|
PRInt32
|
|
|
|
nsTextTransformer::ScanPreData_B(PRInt32* aWordLen)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRUnichar* bp = mTransformBuf.GetBufferEnd();
|
|
|
|
PRUnichar* startbp = mTransformBuf.GetBuffer();
|
|
|
|
|
|
|
|
while (--offset >= 0) {
|
|
|
|
PRUnichar ch = frag->CharAt(offset);
|
|
|
|
if ((ch == '\t') || (ch == '\n')) {
|
|
|
|
break;
|
|
|
|
}
|
1999-10-22 00:47:36 +04:00
|
|
|
if (CH_NBSP == ch) {
|
|
|
|
ch = ' ';
|
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
else if (IS_DISCARDED(ch)) {
|
|
|
|
continue;
|
|
|
|
}
|
2000-09-12 04:00:58 +04:00
|
|
|
if (ch > MAX_UNIBYTE) SetHasMultibyte(PR_TRUE);
|
1999-10-20 03:01:45 +04:00
|
|
|
if (bp == startbp) {
|
|
|
|
PRInt32 oldLength = mTransformBuf.mBufferLen;
|
|
|
|
nsresult rv = mTransformBuf.GrowBy(1000);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// If we run out of space (unlikely) then just chop the input
|
|
|
|
offset++;
|
|
|
|
break;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
bp = mTransformBuf.GetBufferEnd() - oldLength;
|
|
|
|
startbp = mTransformBuf.GetBuffer();
|
1999-10-16 03:36:07 +04:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
*--bp = ch;
|
|
|
|
}
|
1999-08-19 10:06:57 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
*aWordLen = mTransformBuf.GetBufferEnd() - bp;
|
|
|
|
return offset;
|
|
|
|
}
|
1999-08-19 10:06:57 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
//----------------------------------------
|
1999-08-19 10:06:57 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar*
|
|
|
|
nsTextTransformer::GetPrevWord(PRBool aInWord,
|
|
|
|
PRInt32* aWordLenResult,
|
|
|
|
PRInt32* aContentLenResult,
|
|
|
|
PRBool* aIsWhiteSpaceResult,
|
|
|
|
PRBool aForLineBreak)
|
|
|
|
{
|
|
|
|
const nsTextFragment* frag = mFrag;
|
|
|
|
PRInt32 offset = mOffset;
|
|
|
|
PRInt32 wordLen = 0;
|
|
|
|
PRBool isWhitespace = PR_FALSE;
|
|
|
|
PRUnichar* result = nsnull;
|
|
|
|
|
2000-01-12 01:07:13 +03:00
|
|
|
// Fix word breaking problem w/ PREFORMAT and PREWRAP
|
|
|
|
// for word breaking, we should really go to the normal code
|
|
|
|
if((! aForLineBreak) && (eNormal != mMode))
|
|
|
|
mMode = eNormal;
|
|
|
|
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
PRInt32 limit = (*aWordLenResult > 0) ? *aWordLenResult : 0;
|
|
|
|
while (--offset >= limit) {
|
|
|
|
#else
|
1999-10-26 03:04:51 +04:00
|
|
|
while (--offset >= 0) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
PRUnichar firstChar = frag->CharAt(offset);
|
1999-10-26 03:04:51 +04:00
|
|
|
|
|
|
|
// Eat up any discarded characters before dispatching
|
|
|
|
if (IS_DISCARDED(firstChar)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
switch (mMode) {
|
|
|
|
default:
|
|
|
|
case eNormal:
|
|
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
|
|
offset = ScanNormalWhiteSpace_B();
|
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
1999-10-16 03:36:07 +04:00
|
|
|
}
|
2000-08-25 01:15:19 +04:00
|
|
|
else if (CH_NBSP == firstChar && !aForLineBreak) {
|
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen - 1] = ' ';
|
|
|
|
offset--;
|
|
|
|
} else if (frag->Is2b()) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = *aWordLenResult;
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
|
|
|
offset = ScanNormalAsciiText_B(&wordLen);
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ePreformatted:
|
|
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
|
|
|
|
offset--; // make sure we overshoot
|
|
|
|
wordLen = 1;
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
offset = ScanPreData_B(&wordLen);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ePreWrap:
|
|
|
|
if (XP_IS_SPACE(firstChar)) {
|
|
|
|
if (('\n' == firstChar) || ('\t' == firstChar)) {
|
|
|
|
mTransformBuf.mBuffer[mTransformBuf.mBufferLen-1] = firstChar;
|
|
|
|
offset--; // make sure we overshoot
|
|
|
|
wordLen = 1;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
|
|
|
offset = ScanPreWrapWhiteSpace_B(&wordLen);
|
|
|
|
}
|
|
|
|
isWhitespace = PR_TRUE;
|
|
|
|
}
|
|
|
|
else if (frag->Is2b()) {
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = *aWordLenResult;
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
offset = ScanNormalUnicodeText_B(aForLineBreak, &wordLen);
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
else {
|
|
|
|
offset = ScanNormalAsciiText_B(&wordLen);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Backwards scanning routines *always* overshoot by one for the
|
|
|
|
// returned offset value.
|
|
|
|
offset = offset + 1;
|
|
|
|
|
|
|
|
result = mTransformBuf.GetBufferEnd() - wordLen;
|
|
|
|
|
|
|
|
if (!isWhitespace) {
|
|
|
|
switch (mTextTransform) {
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE:
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToTitle(result, result, wordLen, !aInWord);
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToLower(result, result, wordLen);
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
|
|
|
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
2001-10-16 16:38:19 +04:00
|
|
|
if(NS_SUCCEEDED(EnsureCaseConv()))
|
|
|
|
gCaseConv->ToUpper(result, result, wordLen);
|
1999-10-20 03:01:45 +04:00
|
|
|
break;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
}
|
1999-10-26 03:04:51 +04:00
|
|
|
break;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
*aWordLenResult = wordLen;
|
|
|
|
*aContentLenResult = mOffset - offset;
|
|
|
|
*aIsWhiteSpaceResult = isWhitespace;
|
|
|
|
|
|
|
|
mOffset = offset;
|
|
|
|
return result;
|
|
|
|
}
|
1999-02-22 06:20:59 +03:00
|
|
|
|
2002-06-12 01:00:20 +04:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
void
|
|
|
|
nsTextTransformer::DoArabicShaping(PRUnichar* aText,
|
|
|
|
PRInt32& aTextLength,
|
|
|
|
PRBool* aWasTransformed)
|
|
|
|
{
|
|
|
|
if (aTextLength <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
PRInt32 newLen;
|
|
|
|
PRBool isVisual;
|
|
|
|
mPresContext->IsVisualMode(isVisual);
|
|
|
|
|
|
|
|
nsAutoString buf;
|
|
|
|
buf.SetLength(aTextLength);
|
|
|
|
PRUnichar* buffer = (PRUnichar*)buf.get();
|
|
|
|
|
|
|
|
ArabicShaping(aText, buf.Length(), buffer, (PRUint32 *)&newLen, !isVisual, !isVisual);
|
|
|
|
|
|
|
|
PRUnichar *source = buffer;
|
|
|
|
PRUnichar *target = aText;
|
|
|
|
for (PRInt32 i = 0; i < newLen; i++) {
|
|
|
|
if (*source == CH_ZWNJ || *source == CH_ZWJ) {
|
|
|
|
source++;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*target++ = *source++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
aTextLength = target - aText;
|
|
|
|
*aWasTransformed = PR_TRUE;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
2002-06-12 01:00:20 +04:00
|
|
|
void
|
|
|
|
nsTextTransformer::DoNumericShaping(PRUnichar* aText,
|
|
|
|
PRInt32& aTextLength,
|
|
|
|
PRBool* aWasTransformed)
|
|
|
|
{
|
|
|
|
if (aTextLength <= 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
PRUint32 bidiOptions;
|
|
|
|
mPresContext->GetBidi(&bidiOptions);
|
|
|
|
|
|
|
|
switch (GET_BIDI_OPTION_NUMERAL(bidiOptions)) {
|
|
|
|
|
|
|
|
case IBMBIDI_NUMERAL_HINDI:
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IBMBIDI_NUMERAL_ARABIC:
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IBMBIDI_NUMERAL_REGULAR:
|
|
|
|
|
|
|
|
switch (mCharType) {
|
|
|
|
|
|
|
|
case eCharType_EuropeanNumber:
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case eCharType_ArabicNumber:
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IBMBIDI_NUMERAL_HINDICONTEXT:
|
|
|
|
if (((GET_BIDI_OPTION_DIRECTION(bidiOptions)==IBMBIDI_TEXTDIRECTION_RTL) &&
|
|
|
|
(IS_ARABIC_DIGIT (aText[0]))) ||
|
|
|
|
(eCharType_ArabicNumber == mCharType))
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_HINDI);
|
|
|
|
else if (eCharType_EuropeanNumber == mCharType)
|
|
|
|
HandleNumbers(aText, aTextLength, IBMBIDI_NUMERAL_ARABIC);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
1999-10-20 03:01:45 +04:00
|
|
|
// Self test logic for this class. This will (hopefully) make sure
|
|
|
|
// that the forward and backward word iterator methods continue to
|
|
|
|
// function as people change things...
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
struct SelfTestSection {
|
|
|
|
int length;
|
|
|
|
int* data;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define NUM_MODES 3
|
|
|
|
|
|
|
|
struct SelfTestData {
|
|
|
|
const PRUnichar* text;
|
|
|
|
SelfTestSection modes[NUM_MODES];
|
|
|
|
};
|
|
|
|
|
|
|
|
static PRUint8 preModeValue[NUM_MODES] = {
|
|
|
|
NS_STYLE_WHITESPACE_NORMAL,
|
|
|
|
NS_STYLE_WHITESPACE_PRE,
|
|
|
|
NS_STYLE_WHITESPACE_MOZ_PRE_WRAP
|
|
|
|
};
|
|
|
|
|
|
|
|
static PRUnichar test1text[] = {
|
|
|
|
'o', 'n', 'c', 'e', ' ', 'u', 'p', 'o', 'n', '\t',
|
|
|
|
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
|
|
|
|
};
|
|
|
|
static int test1Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
static int test1PreResults[] = { 9, 1, 12 };
|
|
|
|
static int test1PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
|
|
|
|
static PRUnichar test2text[] = {
|
|
|
|
0xF6, 'n', 'c', 'e', ' ', 0xFB, 'p', 'o', 'n', '\t',
|
|
|
|
0xE3, ' ', 's', 'h', 0xF3, 'r', 't', ' ', 't', 0xEE, 'm', 'e', ' ', 0
|
|
|
|
};
|
|
|
|
static int test2Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
|
|
|
|
static int test2PreResults[] = { 9, 1, 13 };
|
|
|
|
static int test2PreWrapResults[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4, 1 };
|
|
|
|
|
|
|
|
static PRUnichar test3text[] = {
|
|
|
|
0x0152, 'n', 'c', 'e', ' ', 'x', 'y', '\t', 'z', 'y', ' ', 0
|
|
|
|
};
|
|
|
|
static int test3Results[] = { 4, 1, 2, 1, 2, 1, };
|
|
|
|
static int test3PreResults[] = { 7, 1, 3, };
|
|
|
|
static int test3PreWrapResults[] = { 4, 1, 2, 1, 2, 1, };
|
|
|
|
|
|
|
|
static PRUnichar test4text[] = {
|
1999-10-26 03:04:51 +04:00
|
|
|
'o', 'n', CH_SHY, 'c', 'e', ' ', CH_SHY, ' ', 'u', 'p', 'o', 'n', '\t',
|
|
|
|
'a', ' ', 's', 'h', 'o', 'r', 't', ' ', 't', 'i', 'm', 'e', 0
|
|
|
|
};
|
|
|
|
static int test4Results[] = { 4, 1, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
static int test4PreResults[] = { 10, 1, 12 };
|
|
|
|
static int test4PreWrapResults[] = { 4, 2, 4, 1, 1, 1, 5, 1, 4 };
|
|
|
|
|
|
|
|
static PRUnichar test5text[] = {
|
|
|
|
CH_SHY, 0
|
|
|
|
};
|
|
|
|
static int test5Results[] = { 0 };
|
|
|
|
static int test5PreResults[] = { 0 };
|
|
|
|
static int test5PreWrapResults[] = { 0 };
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
static PRUnichar test6text[] = {
|
1999-10-20 03:01:45 +04:00
|
|
|
0x30d5, 0x30b8, 0x30c6, 0x30ec, 0x30d3, 0x306e, 0x97f3, 0x697d,
|
|
|
|
0x756a, 0x7d44, 0x300c, 'H', 'E', 'Y', '!', ' ', 'H', 'E', 'Y', '!',
|
|
|
|
'\t', 'H', 'E', 'Y', '!', 0x300d, 0x306e, 0x30db, 0x30fc, 0x30e0,
|
|
|
|
0x30da, 0x30fc, 0x30b8, 0x3002, 0
|
|
|
|
};
|
1999-10-26 03:04:51 +04:00
|
|
|
static int test6Results[] = { 1, 1, 1, 1, 1,
|
1999-10-20 03:01:45 +04:00
|
|
|
1, 1, 1, 1, 1,
|
|
|
|
5, 1, 4, 1, 5,
|
|
|
|
1, 2, 1, 2, 2 };
|
1999-10-26 03:04:51 +04:00
|
|
|
static int test6PreResults[] = { 20, 1, 13 };
|
|
|
|
static int test6PreWrapResults[] = { 1, 1, 1, 1, 1,
|
1999-10-20 03:01:45 +04:00
|
|
|
1, 1, 1, 1, 1,
|
|
|
|
5, 1, 4, 1, 5,
|
|
|
|
1, 2, 1, 2, 2 };
|
|
|
|
#endif
|
1999-02-23 05:27:54 +03:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
static SelfTestData tests[] = {
|
|
|
|
{ test1text,
|
|
|
|
{ { sizeof(test1Results)/sizeof(int), test1Results, },
|
|
|
|
{ sizeof(test1PreResults)/sizeof(int), test1PreResults, },
|
|
|
|
{ sizeof(test1PreWrapResults)/sizeof(int), test1PreWrapResults, } }
|
|
|
|
},
|
|
|
|
{ test2text,
|
|
|
|
{ { sizeof(test2Results)/sizeof(int), test2Results, },
|
|
|
|
{ sizeof(test2PreResults)/sizeof(int), test2PreResults, },
|
|
|
|
{ sizeof(test2PreWrapResults)/sizeof(int), test2PreWrapResults, } }
|
|
|
|
},
|
|
|
|
{ test3text,
|
|
|
|
{ { sizeof(test3Results)/sizeof(int), test3Results, },
|
|
|
|
{ sizeof(test3PreResults)/sizeof(int), test3PreResults, },
|
|
|
|
{ sizeof(test3PreWrapResults)/sizeof(int), test3PreWrapResults, } }
|
|
|
|
},
|
|
|
|
{ test4text,
|
|
|
|
{ { sizeof(test4Results)/sizeof(int), test4Results, },
|
|
|
|
{ sizeof(test4PreResults)/sizeof(int), test4PreResults, },
|
|
|
|
{ sizeof(test4PreWrapResults)/sizeof(int), test4PreWrapResults, } }
|
|
|
|
},
|
1999-10-26 03:04:51 +04:00
|
|
|
{ test5text,
|
|
|
|
{ { sizeof(test5Results)/sizeof(int), test5Results, },
|
|
|
|
{ sizeof(test5PreResults)/sizeof(int), test5PreResults, },
|
|
|
|
{ sizeof(test5PreWrapResults)/sizeof(int), test5PreWrapResults, } }
|
|
|
|
},
|
|
|
|
#if 0
|
|
|
|
{ test6text,
|
|
|
|
{ { sizeof(test6Results)/sizeof(int), test6Results, },
|
|
|
|
{ sizeof(test6PreResults)/sizeof(int), test6PreResults, },
|
|
|
|
{ sizeof(test6PreWrapResults)/sizeof(int), test6PreWrapResults, } }
|
|
|
|
},
|
1999-10-20 03:01:45 +04:00
|
|
|
#endif
|
|
|
|
};
|
1999-02-22 06:20:59 +03:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
#define NUM_TESTS (sizeof(tests) / sizeof(tests[0]))
|
1999-08-19 10:06:57 +04:00
|
|
|
|
1999-10-20 03:01:45 +04:00
|
|
|
void
|
|
|
|
nsTextTransformer::SelfTest(nsILineBreaker* aLineBreaker,
|
2000-09-21 03:00:32 +04:00
|
|
|
nsIWordBreaker* aWordBreaker,
|
|
|
|
nsIPresContext* aPresContext)
|
1999-08-19 10:06:57 +04:00
|
|
|
{
|
1999-10-20 03:01:45 +04:00
|
|
|
PRBool gNoisy = PR_FALSE;
|
|
|
|
if (PR_GetEnv("GECKO_TEXT_TRANSFORMER_NOISY_SELF_TEST")) {
|
|
|
|
gNoisy = PR_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
PRBool error = PR_FALSE;
|
|
|
|
PRInt32 testNum = 0;
|
|
|
|
SelfTestData* st = tests;
|
|
|
|
SelfTestData* last = st + NUM_TESTS;
|
|
|
|
for (; st < last; st++) {
|
|
|
|
PRUnichar* bp;
|
|
|
|
PRInt32 wordLen, contentLen;
|
2000-04-12 18:54:43 +04:00
|
|
|
PRBool ws, transformed;
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
PRBool isAsciiTest = PR_TRUE;
|
|
|
|
const PRUnichar* cp = st->text;
|
|
|
|
while (*cp) {
|
|
|
|
if (*cp > 255) {
|
|
|
|
isAsciiTest = PR_FALSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
}
|
|
|
|
|
|
|
|
nsTextFragment frag(st->text);
|
2000-09-21 03:00:32 +04:00
|
|
|
nsTextTransformer tx(aLineBreaker, aWordBreaker, aPresContext);
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
for (PRInt32 preMode = 0; preMode < NUM_MODES; preMode++) {
|
|
|
|
// Do forwards test
|
|
|
|
if (gNoisy) {
|
|
|
|
nsAutoString uc2(st->text);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("%s forwards test: '", isAsciiTest ? "ascii" : "unicode");
|
2001-10-16 07:53:44 +04:00
|
|
|
fputs(NS_LossyConvertUCS2toASCII(uc2).get(), stdout);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("'\n");
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
tx.Init2(&frag, 0, preModeValue[preMode], NS_STYLE_TEXT_TRANSFORM_NONE);
|
|
|
|
|
|
|
|
int* expectedResults = st->modes[preMode].data;
|
|
|
|
int resultsLen = st->modes[preMode].length;
|
|
|
|
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = -1;
|
|
|
|
#endif
|
2000-04-12 18:54:43 +04:00
|
|
|
while ((bp = tx.GetNextWord(PR_FALSE, &wordLen, &contentLen, &ws, &transformed))) {
|
1999-10-20 03:01:45 +04:00
|
|
|
if (gNoisy) {
|
2000-10-29 02:17:53 +04:00
|
|
|
nsAutoString tmp(bp, wordLen);
|
|
|
|
printf(" '");
|
2001-10-16 07:53:44 +04:00
|
|
|
fputs(NS_LossyConvertUCS2toASCII(tmp).get(), stdout);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("': ws=%s wordLen=%d (%d) contentLen=%d (offset=%d)\n",
|
1999-10-20 03:01:45 +04:00
|
|
|
ws ? "yes" : "no",
|
|
|
|
wordLen, *expectedResults, contentLen, tx.mOffset);
|
|
|
|
}
|
|
|
|
if (*expectedResults != wordLen) {
|
|
|
|
error = PR_TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
expectedResults++;
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = -1;
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
if (expectedResults != st->modes[preMode].data + resultsLen) {
|
1999-10-26 03:04:51 +04:00
|
|
|
if (st->modes[preMode].data[0] != 0) {
|
|
|
|
error = PR_TRUE;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Do backwards test
|
|
|
|
if (gNoisy) {
|
|
|
|
nsAutoString uc2(st->text);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("%s backwards test: '", isAsciiTest ? "ascii" : "unicode");
|
2001-10-16 07:53:44 +04:00
|
|
|
fputs(NS_LossyConvertUCS2toASCII(uc2).get(), stdout);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("'\n");
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
tx.Init2(&frag, frag.GetLength(), NS_STYLE_WHITESPACE_NORMAL,
|
|
|
|
NS_STYLE_TEXT_TRANSFORM_NONE);
|
|
|
|
expectedResults = st->modes[preMode].data + resultsLen;
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = -1;
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
while ((bp = tx.GetPrevWord(PR_FALSE, &wordLen, &contentLen, &ws))) {
|
|
|
|
--expectedResults;
|
|
|
|
if (gNoisy) {
|
2000-10-29 02:17:53 +04:00
|
|
|
nsAutoString tmp(bp, wordLen);
|
|
|
|
printf(" '");
|
2001-10-16 07:53:44 +04:00
|
|
|
fputs(NS_LossyConvertUCS2toASCII(tmp).get(), stdout);
|
2000-10-29 02:17:53 +04:00
|
|
|
printf("': ws=%s wordLen=%d contentLen=%d (offset=%d)\n",
|
1999-10-20 03:01:45 +04:00
|
|
|
ws ? "yes" : "no",
|
|
|
|
wordLen, contentLen, tx.mOffset);
|
|
|
|
}
|
|
|
|
if (*expectedResults != wordLen) {
|
|
|
|
error = PR_TRUE;
|
|
|
|
break;
|
|
|
|
}
|
2001-11-14 17:21:52 +03:00
|
|
|
#ifdef IBMBIDI
|
|
|
|
wordLen = -1;
|
|
|
|
#endif
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
if (expectedResults != st->modes[preMode].data) {
|
1999-10-26 03:04:51 +04:00
|
|
|
if (st->modes[preMode].data[0] != 0) {
|
|
|
|
error = PR_TRUE;
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (error) {
|
2000-10-29 02:17:53 +04:00
|
|
|
fprintf(stderr, "nsTextTransformer: self test %d failed\n", testNum);
|
1999-10-20 03:01:45 +04:00
|
|
|
}
|
|
|
|
testNum++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (error) {
|
|
|
|
NS_ABORT();
|
1999-08-19 10:06:57 +04:00
|
|
|
}
|
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
|
|
|
|
nsresult
|
|
|
|
nsTextTransformer::Init2(const nsTextFragment* aFrag,
|
|
|
|
PRInt32 aStartingOffset,
|
|
|
|
PRUint8 aWhiteSpace,
|
|
|
|
PRUint8 aTextTransform)
|
|
|
|
{
|
|
|
|
mFrag = aFrag;
|
|
|
|
|
|
|
|
// Sanitize aStartingOffset
|
2002-01-24 12:20:51 +03:00
|
|
|
if (aStartingOffset < 0) {
|
|
|
|
NS_WARNING("bad starting offset");
|
1999-10-20 03:01:45 +04:00
|
|
|
aStartingOffset = 0;
|
|
|
|
}
|
2002-01-24 12:20:51 +03:00
|
|
|
else if (aStartingOffset > mFrag->GetLength()) {
|
|
|
|
NS_WARNING("bad starting offset");
|
1999-10-20 03:01:45 +04:00
|
|
|
aStartingOffset = mFrag->GetLength();
|
|
|
|
}
|
|
|
|
mOffset = aStartingOffset;
|
|
|
|
|
|
|
|
// Get the frames text style information
|
|
|
|
if (NS_STYLE_WHITESPACE_PRE == aWhiteSpace) {
|
|
|
|
mMode = ePreformatted;
|
|
|
|
}
|
|
|
|
else if (NS_STYLE_WHITESPACE_MOZ_PRE_WRAP == aWhiteSpace) {
|
|
|
|
mMode = ePreWrap;
|
|
|
|
}
|
|
|
|
mTextTransform = aTextTransform;
|
|
|
|
|
|
|
|
return NS_OK;
|
1999-02-22 06:20:59 +03:00
|
|
|
}
|
1999-10-20 03:01:45 +04:00
|
|
|
#endif /* DEBUG */
|