/* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * IBM Corporation. * Portions created by the Initial Developer are Copyright (C) 2000 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Simon Montagu * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #ifdef IBMBIDI #include "prmem.h" #include "nsBidi.h" #include "nsUnicodeProperties.h" #include "nsCRT.h" using namespace mozilla::unicode; // These are #defined in under Solaris 10 x86 #undef CS #undef ES /* Comparing the description of the Bidi algorithm with this implementation is easier with the same names for the Bidi types in the code as there. */ enum { L = eCharType_LeftToRight, R = eCharType_RightToLeft, EN = eCharType_EuropeanNumber, ES = eCharType_EuropeanNumberSeparator, ET = eCharType_EuropeanNumberTerminator, AN = eCharType_ArabicNumber, CS = eCharType_CommonNumberSeparator, B = eCharType_BlockSeparator, S = eCharType_SegmentSeparator, WS = eCharType_WhiteSpaceNeutral, O_N = eCharType_OtherNeutral, LRE = eCharType_LeftToRightEmbedding, LRO = eCharType_LeftToRightOverride, AL = eCharType_RightToLeftArabic, RLE = eCharType_RightToLeftEmbedding, RLO = eCharType_RightToLeftOverride, PDF = eCharType_PopDirectionalFormat, NSM = eCharType_DirNonSpacingMark, BN = eCharType_BoundaryNeutral, dirPropCount }; /* to avoid some conditional statements, use tiny constant arrays */ static Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; static Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; static Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] #define DIRPROP_FLAG_E(level) flagE[(level)&1] #define DIRPROP_FLAG_O(level) flagO[(level)&1] /* * General implementation notes: * * Throughout the implementation, there are comments like (W2) that refer to * rules of the Bidi algorithm in its version 5, in this example to the second * rule of the resolution of weak types. * * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) * character according to UTF-16, the second UChar gets the directional property of * the entire character assigned, while the first one gets a BN, a boundary * neutral, type, which is ignored by most of the algorithm according to * rule (X9) and the implementation suggestions of the Bidi algorithm. * * Later, AdjustWSLevels() will set the level for each BN to that of the * following character (UChar), which results in surrogate pairs getting the * same level on each of their surrogates. * * In a UTF-8 implementation, the same thing could be done: the last byte of * a multi-byte sequence would get the "real" property, while all previous * bytes of that sequence would get BN. * * It is not possible to assign all those parts of a character the same real * property because this would fail in the resolution of weak types with rules * that look at immediately surrounding types. * * As a related topic, this implementation does not remove Boundary Neutral * types from the input, but ignores them whenever this is relevant. * For example, the loop for the resolution of the weak types reads * types until it finds a non-BN. * Also, explicit embedding codes are neither changed into BN nor removed. * They are only treated the same way real BNs are. * As stated before, AdjustWSLevels() takes care of them at the end. * For the purpose of conformance, the levels of all these codes * do not matter. * * Note that this implementation never modifies the dirProps * after the initial setup. * * * In this implementation, the resolution of weak types (Wn), * neutrals (Nn), and the assignment of the resolved level (In) * are all done in one single loop, in ResolveImplicitLevels(). * Changes of dirProp values are done on the fly, without writing * them back to the dirProps array. * * * This implementation contains code that allows to bypass steps of the * algorithm that are not needed on the specific paragraph * in order to speed up the most common cases considerably, * like text that is entirely LTR, or RTL text without numbers. * * Most of this is done by setting a bit for each directional property * in a flags variable and later checking for whether there are * any LTR characters or any RTL characters, or both, whether * there are any explicit embedding codes, etc. * * If the (Xn) steps are performed, then the flags are re-evaluated, * because they will then not contain the embedding codes any more * and will be adjusted for override codes, so that subsequently * more bypassing may be possible than what the initial flags suggested. * * If the text is not mixed-directional, then the * algorithm steps for the weak type resolution are not performed, * and all levels are set to the paragraph level. * * If there are no explicit embedding codes, then the (Xn) steps * are not performed. * * If embedding levels are supplied as a parameter, then all * explicit embedding codes are ignored, and the (Xn) steps * are not performed. * * White Space types could get the level of the run they belong to, * and are checked with a test of (flags&MASK_EMBEDDING) to * consider if the paragraph direction should be considered in * the flags variable. * * If there are no White Space types in the paragraph, then * (L1) is not necessary in AdjustWSLevels(). */ nsBidi::nsBidi() { Init(); mMayAllocateText=true; mMayAllocateRuns=true; } nsBidi::~nsBidi() { Free(); } void nsBidi::Init() { /* reset the object, all pointers NULL, all flags false, all sizes 0 */ mLength = 0; mParaLevel = 0; mFlags = 0; mDirection = NSBIDI_LTR; mTrailingWSStart = 0; mDirPropsSize = 0; mLevelsSize = 0; mRunsSize = 0; mRunCount = -1; mDirProps=NULL; mLevels=NULL; mRuns=NULL; mDirPropsMemory=NULL; mLevelsMemory=NULL; mRunsMemory=NULL; mMayAllocateText=false; mMayAllocateRuns=false; } /* * We are allowed to allocate memory if aMemory==NULL or * aMayAllocate==true for each array that we need. * We also try to grow and shrink memory as needed if we * allocate it. * * Assume aSizeNeeded>0. * If *aMemory!=NULL, then assume *aSize>0. * * ### this realloc() may unnecessarily copy the old data, * which we know we don't need any more; * is this the best way to do this?? */ bool nsBidi::GetMemory(void **aMemory, PRSize *aSize, bool aMayAllocate, PRSize aSizeNeeded) { /* check for existing memory */ if(*aMemory==NULL) { /* we need to allocate memory */ if(!aMayAllocate) { return false; } else { *aMemory=PR_MALLOC(aSizeNeeded); if (*aMemory!=NULL) { *aSize=aSizeNeeded; return true; } else { *aSize=0; return false; } } } else { /* there is some memory, is it enough or too much? */ if(aSizeNeeded>*aSize && !aMayAllocate) { /* not enough memory, and we must not allocate */ return false; } else if(aSizeNeeded!=*aSize && aMayAllocate) { /* we may try to grow or shrink */ void *memory=PR_REALLOC(*aMemory, aSizeNeeded); if(memory!=NULL) { *aMemory=memory; *aSize=aSizeNeeded; return true; } else { /* we failed to grow */ return false; } } else { /* we have at least enough memory and must not allocate */ return true; } } } void nsBidi::Free() { PR_FREEIF(mDirPropsMemory); PR_FREEIF(mLevelsMemory); PR_FREEIF(mRunsMemory); } /* SetPara ------------------------------------------------------------ */ nsresult nsBidi::SetPara(const PRUnichar *aText, PRInt32 aLength, nsBidiLevel aParaLevel, nsBidiLevel *aEmbeddingLevels) { nsBidiDirection direction; /* check the argument values */ if(aText==NULL || ((NSBIDI_MAX_EXPLICIT_LEVEL=NSBIDI_MAX_EXPLICIT_LEVEL */ PRUint32 countOver60=0, countOver61=0; /* count overflows of explicit levels */ /* recalculate the flags */ flags=0; /* since we assume that this is a single paragraph, we ignore (X8) */ for(i=0; i0) { --countOver61; } else if(countOver60>0 && (embeddingLevel&~NSBIDI_LEVEL_OVERRIDE)!=NSBIDI_MAX_EXPLICIT_LEVEL) { /* handle LRx overflows from level 60 */ --countOver60; } else if(stackTop>0) { /* this is the pop operation; it also pops level 61 while countOver60>0 */ --stackTop; embeddingLevel=stack[stackTop]; /* } else { (underflow) */ } flags|=DIRPROP_FLAG(BN); break; case B: /* * We do not really expect to see a paragraph separator (B), * but we should do something reasonable with it, * especially at the end of the text. */ stackTop=0; countOver60=countOver61=0; embeddingLevel=level=mParaLevel; flags|=DIRPROP_FLAG(B); break; case BN: /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ /* they will get their levels set correctly in AdjustWSLevels() */ flags|=DIRPROP_FLAG(BN); break; default: /* all other types get the "real" level */ if(level!=embeddingLevel) { level=embeddingLevel; if(level&NSBIDI_LEVEL_OVERRIDE) { flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; } else { flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; } } if(!(level&NSBIDI_LEVEL_OVERRIDE)) { flags|=DIRPROP_FLAG(dirProp); } break; } /* * We need to set reasonable levels even on BN codes and * explicit codes because we will later look at same-level runs (X10). */ levels[i]=level; } if(flags&MASK_EMBEDDING) { flags|=DIRPROP_FLAG_LR(mParaLevel); } /* subsequently, ignore the explicit codes and BN (X9) */ /* again, determine if the text is mixed-directional or single-directional */ mFlags=flags; direction=DirectionFromFlags(flags); } return direction; } /* * Use a pre-specified embedding levels array: * * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), * ignore all explicit codes (X9), * and check all the preset levels. * * Recalculate the flags to have them reflect the real properties * after taking the explicit embeddings into account. */ nsresult nsBidi::CheckExplicitLevels(nsBidiDirection *aDirection) { const DirProp *dirProps=mDirProps; nsBidiLevel *levels=mLevels; PRInt32 i, length=mLength; Flags flags=0; /* collect all directionalities in the text */ nsBidiLevel level, paraLevel=mParaLevel; for(i=0; i>=EN_SHIFT; /* * Technically, this should be done before the switch() in the form * if(nextDirProp==NSM) { * dirProps[next]=nextDirProp=dirProp; * } * * - effectively one iteration ahead. * However, whether the next dirProp is NSM or is equal to the current dirProp * does not change the outcome of any condition in (W2)..(W7). */ break; default: break; } /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */ /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */ /* this is one iteration late for the neutrals */ if(DIRPROP_FLAG(dirProp)&MASK_N) { if(neutralStart<0) { /* start of a sequence of neutrals */ neutralStart=i; beforeNeutral=prevDirProp; } } else /* not a neutral, can be only one of { L, R, EN, AN } */ { /* * Note that all levels[] values are still the same at this * point because this function is called for an entire * same-level run. * Therefore, we need to read only one actual level. */ nsBidiLevel level=levels[i]; if(neutralStart>=0) { nsBidiLevel final; /* end of a sequence of neutrals (dirProp is "afterNeutral") */ if(beforeNeutral==L) { if(dirProp==L) { final=0; /* make all neutrals L (N1) */ } else { final=level; /* make all neutrals "e" (N2) */ } } else /* beforeNeutral is one of { R, EN, AN } */ { if(dirProp==L) { final=level; /* make all neutrals "e" (N2) */ } else { final=1; /* make all neutrals R (N1) */ } } /* perform (In) on the sequence of neutrals */ if((level^final)&1) { /* do something only if we need to _change_ the level */ do { ++levels[neutralStart]; } while(++neutralStart=0) { /* * Note that all levels[] values are still the same at this * point because this function is called for an entire * same-level run. * Therefore, we need to read only one actual level. */ nsBidiLevel level=levels[neutralStart], final; /* end of a sequence of neutrals (aEOR is "afterNeutral") */ if(beforeNeutral==L) { if(aEOR==L) { final=0; /* make all neutrals L (N1) */ } else { final=level; /* make all neutrals "e" (N2) */ } } else /* beforeNeutral is one of { R, EN, AN } */ { if(aEOR==L) { final=level; /* make all neutrals "e" (N2) */ } else { final=1; /* make all neutrals R (N1) */ } } /* perform (In) on the sequence of neutrals */ if((level^final)&1) { /* do something only if we need to _change_ the level */ do { ++levels[neutralStart]; } while(++neutralStart0) { /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { levels[i]=paraLevel; } /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ /* here, i+1 is guaranteed to be 0) { flag=DIRPROP_FLAG(dirProps[--i]); if(flag&MASK_BN_EXPLICIT) { levels[i]=levels[i+1]; } else if(flag&MASK_B_S) { levels[i]=paraLevel; break; } } } } /* now remove the NSBIDI_LEVEL_OVERRIDE flags, if any */ /* (a separate loop can be optimized more easily by a compiler) */ if(mFlags&MASK_OVERRIDE) { for(i=mTrailingWSStart; i>0;) { levels[--i]&=~NSBIDI_LEVEL_OVERRIDE; } } } nsresult nsBidi::GetDirection(nsBidiDirection* aDirection) { *aDirection = mDirection; return NS_OK; } nsresult nsBidi::GetParaLevel(nsBidiLevel* aParaLevel) { *aParaLevel = mParaLevel; return NS_OK; } #ifdef FULL_BIDI_ENGINE /* -------------------------------------------------------------------------- */ nsresult nsBidi::GetLength(PRInt32* aLength) { *aLength = mLength; return NS_OK; } /* * General remarks about the functions in this section: * * These functions deal with the aspects of potentially mixed-directional * text in a single paragraph or in a line of a single paragraph * which has already been processed according to * the Unicode 3.0 Bidi algorithm as defined in * http://www.unicode.org/unicode/reports/tr9/ , version 5, * also described in The Unicode Standard, Version 3.0 . * * This means that there is a nsBidi object with a levels * and a dirProps array. * paraLevel and direction are also set. * Only if the length of the text is zero, then levels==dirProps==NULL. * * The overall directionality of the paragraph * or line is used to bypass the reordering steps if possible. * Even purely RTL text does not need reordering there because * the getLogical/VisualIndex() functions can compute the * index on the fly in such a case. * * The implementation of the access to same-level-runs and of the reordering * do attempt to provide better performance and less memory usage compared to * a direct implementation of especially rule (L2) with an array of * one (32-bit) integer per text character. * * Here, the levels array is scanned as soon as necessary, and a vector of * same-level-runs is created. Reordering then is done on this vector. * For each run of text positions that were resolved to the same level, * only 8 bytes are stored: the first text position of the run and the visual * position behind the run after reordering. * One sign bit is used to hold the directionality of the run. * This is inefficient if there are many very short runs. If the average run * length is <2, then this uses more memory. * * In a further attempt to save memory, the levels array is never changed * after all the resolution rules (Xn, Wn, Nn, In). * Many functions have to consider the field trailingWSStart: * if it is less than length, then there is an implicit trailing run * at the paraLevel, * which is not reflected in the levels array. * This allows a line nsBidi object to use the same levels array as * its paragraph parent object. * * When a nsBidi object is created for a line of a paragraph, then the * paragraph's levels and dirProps arrays are reused by way of setting * a pointer into them, not by copying. This again saves memory and forbids to * change the now shared levels for (L1). */ nsresult nsBidi::SetLine(nsIBidi* aParaBidi, PRInt32 aStart, PRInt32 aLimit) { nsBidi* pParent = (nsBidi*)aParaBidi; PRInt32 length; /* check the argument values */ if(pParent==NULL) { return NS_ERROR_INVALID_POINTER; } else if(aStart<0 || aStart>aLimit || aLimit>pParent->mLength) { return NS_ERROR_INVALID_ARG; } /* set members from our aParaBidi parent */ length=mLength=aLimit-aStart; mParaLevel=pParent->mParaLevel; mRuns=NULL; mFlags=0; if(length>0) { mDirProps=pParent->mDirProps+aStart; mLevels=pParent->mLevels+aStart; mRunCount=-1; if(pParent->mDirection!=NSBIDI_MIXED) { /* the parent is already trivial */ mDirection=pParent->mDirection; /* * The parent's levels are all either * implicitly or explicitly ==paraLevel; * do the same here. */ if(pParent->mTrailingWSStart<=aStart) { mTrailingWSStart=0; } else if(pParent->mTrailingWSStartmTrailingWSStart-aStart; } else { mTrailingWSStart=length; } } else { const nsBidiLevel *levels=mLevels; PRInt32 i, trailingWSStart; nsBidiLevel level; Flags flags=0; SetTrailingWSStart(); trailingWSStart=mTrailingWSStart; /* recalculate pLineBidi->direction */ if(trailingWSStart==0) { /* all levels are at paraLevel */ mDirection=(nsBidiDirection)(mParaLevel&1); } else { /* get the level of the first character */ level=levels[0]&1; /* if there is anything of a different level, then the line is mixed */ if(trailingWSStart0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { --start; } /* if the WS run can be merged with the previous run then do so here */ while(start>0 && levels[start-1]==paraLevel) { --start; } mTrailingWSStart=start; } nsresult nsBidi::GetLevelAt(PRInt32 aCharIndex, nsBidiLevel* aLevel) { /* return paraLevel if in the trailing WS run, otherwise the real level */ if(aCharIndex<0 || mLength<=aCharIndex) { *aLevel = 0; } else if(mDirection!=NSBIDI_MIXED || aCharIndex>=mTrailingWSStart) { *aLevel = mParaLevel; } else { *aLevel = mLevels[aCharIndex]; } return NS_OK; } nsresult nsBidi::GetLevels(nsBidiLevel** aLevels) { PRInt32 start, length; length = mLength; if(length<=0) { *aLevels = NULL; return NS_ERROR_INVALID_ARG; } start = mTrailingWSStart; if(start==length) { /* the current levels array reflects the WS run */ *aLevels = mLevels; return NS_OK; } /* * After the previous if(), we know that the levels array * has an implicit trailing WS run and therefore does not fully * reflect itself all the levels. * This must be a nsBidi object for a line, and * we need to create a new levels array. */ if(GETLEVELSMEMORY(length)) { nsBidiLevel *levels=mLevelsMemory; if(start>0 && levels!=mLevels) { memcpy(levels, mLevels, start); } memset(levels+start, mParaLevel, length-start); /* this new levels array is set for the line and reflects the WS run */ mTrailingWSStart=length; *aLevels=mLevels=levels; return NS_OK; } else { /* out of memory */ *aLevels = NULL; return NS_ERROR_OUT_OF_MEMORY; } } #endif // FULL_BIDI_ENGINE nsresult nsBidi::GetCharTypeAt(PRInt32 aCharIndex, nsCharType* pType) { if(aCharIndex<0 || mLength<=aCharIndex) { return NS_ERROR_INVALID_ARG; } *pType = (nsCharType)mDirProps[aCharIndex]; return NS_OK; } nsresult nsBidi::GetLogicalRun(PRInt32 aLogicalStart, PRInt32 *aLogicalLimit, nsBidiLevel *aLevel) { PRInt32 length = mLength; if(aLogicalStart<0 || length<=aLogicalStart) { return NS_ERROR_INVALID_ARG; } if(mDirection!=NSBIDI_MIXED || aLogicalStart>=mTrailingWSStart) { if(aLogicalLimit!=NULL) { *aLogicalLimit=length; } if(aLevel!=NULL) { *aLevel=mParaLevel; } } else { nsBidiLevel *levels=mLevels; nsBidiLevel level=levels[aLogicalStart]; /* search for the end of the run */ length=mTrailingWSStart; while(++aLogicalStart=mRunCount ) { *aDirection = NSBIDI_LTR; return NS_OK; } else { PRInt32 start=mRuns[aRunIndex].logicalStart; if(aLogicalStart!=NULL) { *aLogicalStart=GET_INDEX(start); } if(aLength!=NULL) { if(aRunIndex>0) { *aLength=mRuns[aRunIndex].visualLimit- mRuns[aRunIndex-1].visualLimit; } else { *aLength=mRuns[0].visualLimit; } } *aDirection = (nsBidiDirection)GET_ODD_BIT(start); return NS_OK; } } /* compute the runs array --------------------------------------------------- */ /* * Compute the runs array from the levels array. * After GetRuns() returns true, runCount is guaranteed to be >0 * and the runs are reordered. * Odd-level runs have visualStart on their visual right edge and * they progress visually to the left. */ bool nsBidi::GetRuns() { if(mDirection!=NSBIDI_MIXED) { /* simple, single-run case - this covers length==0 */ GetSingleRun(mParaLevel); } else /* NSBIDI_MIXED, length>0 */ { /* mixed directionality */ PRInt32 length=mLength, limit=length; /* * If there are WS characters at the end of the line * and the run preceding them has a level different from * paraLevel, then they will form their own run at paraLevel (L1). * Count them separately. * We need some special treatment for this in order to not * modify the levels array which a line nsBidi object shares * with its paragraph parent and its other line siblings. * In other words, for the trailing WS, it may be * levels[]!=paraLevel but we have to treat it like it were so. */ limit=mTrailingWSStart; if(limit==0) { /* there is only WS on this line */ GetSingleRun(mParaLevel); } else { nsBidiLevel *levels=mLevels; PRInt32 i, runCount; nsBidiLevel level=NSBIDI_DEFAULT_LTR; /* initialize with no valid level */ /* count the runs, there is at least one non-WS run, and limit>0 */ runCount=0; for(i=0; i1 || limit1 */ if(GETRUNSMEMORY(runCount)) { runs=mRunsMemory; } else { return false; } /* set the runs */ /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ /* however, that would take longer and make other functions more complicated */ runIndex=0; /* search for the run ends */ start=0; level=levels[0]; if(levelmaxLevel) { maxLevel=level; } /* initialize visualLimit values with the run lengths */ for(i=1; imaxLevel) { maxLevel=level; } ++runIndex; } } /* finish the last run at i==limit */ runs[runIndex].logicalStart=start; runs[runIndex].visualLimit=limit-start; ++runIndex; if(limit1 and maxLevel>=minLevel>=paraLevel. * All the visualStart fields=logical start before reordering. * The "odd" bits are not set yet. * * Reordering with this data structure lends itself to some handy shortcuts: * * Since each run is moved but not modified, and since at the initial maxLevel * each sequence of same-level runs consists of only one run each, we * don't need to do anything there and can predecrement maxLevel. * In many simple cases, the reordering is thus done entirely in the * index mapping. * Also, reordering occurs only down to the lowest odd level that occurs, * which is minLevel|1. However, if the lowest level itself is odd, then * in the last reordering the sequence of the runs at this level or higher * will be all runs, and we don't need the elaborate loop to search for them. * This is covered by ++minLevel instead of minLevel|=1 followed * by an extra reorder-all after the reorder-some loop. * About a trailing WS run: * Such a run would need special treatment because its level is not * reflected in levels[] if this is not a paragraph object. * Instead, all characters from trailingWSStart on are implicitly at * paraLevel. * However, for all maxLevel>paraLevel, this run will never be reordered * and does not need to be taken into account. maxLevel==paraLevel is only reordered * if minLevel==paraLevel is odd, which is done in the extra segment. * This means that for the main reordering loop we don't need to consider * this run and can --runCount. If it is later part of the all-runs * reordering, then runCount is adjusted accordingly. */ void nsBidi::ReorderLine(nsBidiLevel aMinLevel, nsBidiLevel aMaxLevel) { Run *runs; nsBidiLevel *levels; PRInt32 firstRun, endRun, limitRun, runCount, temp; /* nothing to do? */ if(aMaxLevel<=(aMinLevel|1)) { return; } /* * Reorder only down to the lowest odd level * and reorder at an odd aMinLevel in a separate, simpler loop. * See comments above for why aMinLevel is always incremented. */ ++aMinLevel; runs=mRuns; levels=mLevels; runCount=mRunCount; /* do not include the WS run at paraLevel<=old aMinLevel except in the simple loop */ if(mTrailingWSStart=aMinLevel) { firstRun=0; /* loop for all sequences of runs */ for(;;) { /* look for a sequence of runs that are all at >=aMaxLevel */ /* look for the first run of such a sequence */ while(firstRun=runCount) { break; /* no more such runs */ } /* look for the limit run of such a sequence (the run behind it) */ for(limitRun=firstRun; ++limitRun=aMaxLevel;) {} /* Swap the entire sequence of runs from firstRun to limitRun-1. */ endRun=limitRun-1; while(firstRun=maxLevel */ /* look for the first index of such a sequence */ while(start=aLength) { break; /* no more such runs */ } /* look for the limit of such a sequence (the index behind it) */ for(limit=start; ++limit=maxLevel;) {} /* * Swap the entire interval of indexes from start to limit-1. * We don't need to swap the levels for the purpose of this * algorithm: the sequence of levels that we look at does not * move anyway. */ end=limit-1; while(start=minLevel); return NS_OK; } bool nsBidi::PrepareReorder(const nsBidiLevel *aLevels, PRInt32 aLength, PRInt32 *aIndexMap, nsBidiLevel *aMinLevel, nsBidiLevel *aMaxLevel) { PRInt32 start; nsBidiLevel level, minLevel, maxLevel; if(aLevels==NULL || aLength<=0) { return false; } /* determine minLevel and maxLevel */ minLevel=NSBIDI_MAX_EXPLICIT_LEVEL+1; maxLevel=0; for(start=aLength; start>0;) { level=aLevels[--start]; if(level>NSBIDI_MAX_EXPLICIT_LEVEL+1) { return false; } if(levelmaxLevel) { maxLevel=level; } } *aMinLevel=minLevel; *aMaxLevel=maxLevel; /* initialize the index map */ for(start=aLength; start>0;) { --start; aIndexMap[start]=start; } return true; } #ifdef FULL_BIDI_ENGINE /* API functions for logical<->visual mapping ------------------------------- */ nsresult nsBidi::GetVisualIndex(PRInt32 aLogicalIndex, PRInt32* aVisualIndex) { if(aLogicalIndex<0 || mLength<=aLogicalIndex) { return NS_ERROR_INVALID_ARG; } else { /* we can do the trivial cases without the runs array */ switch(mDirection) { case NSBIDI_LTR: *aVisualIndex = aLogicalIndex; return NS_OK; case NSBIDI_RTL: *aVisualIndex = mLength-aLogicalIndex-1; return NS_OK; default: if(mRunCount<0 && !GetRuns()) { return NS_ERROR_OUT_OF_MEMORY; } else { Run *runs=mRuns; PRInt32 i, visualStart=0, offset, length; /* linear search for the run, search on the visual runs */ for(i=0;; ++i) { length=runs[i].visualLimit-visualStart; offset=aLogicalIndex-GET_INDEX(runs[i].logicalStart); if(offset>=0 && offset=runs[i].visualLimit; ++i) {} } else { /* binary search for the run */ PRInt32 start=0, limit=runCount; /* the middle if() will guaranteed find the run, we don't need a loop limit */ for(;;) { i=(start+limit)/2; if(aVisualIndex>=runs[i].visualLimit) { start=i+1; } else if(i==0 || aVisualIndex>=runs[i-1].visualLimit) { break; } else { limit=i; } } } start=runs[i].logicalStart; if(IS_EVEN_RUN(start)) { /* LTR */ /* the offset in runs[i] is aVisualIndex-runs[i-1].visualLimit */ if(i>0) { aVisualIndex-=runs[i-1].visualLimit; } *aLogicalIndex = GET_INDEX(start)+aVisualIndex; return NS_OK; } else { /* RTL */ *aLogicalIndex = GET_INDEX(start)+runs[i].visualLimit-aVisualIndex-1; return NS_OK; } } } } } nsresult nsBidi::GetLogicalMap(PRInt32 *aIndexMap) { nsBidiLevel *levels; nsresult rv; /* GetLevels() checks all of its and our arguments */ rv = GetLevels(&levels); if(NS_FAILED(rv)) { return rv; } else if(aIndexMap==NULL) { return NS_ERROR_INVALID_ARG; } else { return ReorderLogical(levels, mLength, aIndexMap); } } nsresult nsBidi::GetVisualMap(PRInt32 *aIndexMap) { PRInt32* runCount=NULL; nsresult rv; /* CountRuns() checks all of its and our arguments */ rv = CountRuns(runCount); if(NS_FAILED(rv)) { return rv; } else if(aIndexMap==NULL) { return NS_ERROR_INVALID_ARG; } else { /* fill a visual-to-logical index map using the runs[] */ Run *runs=mRuns, *runsLimit=runs+mRunCount; PRInt32 logicalStart, visualStart, visualLimit; visualStart=0; for(; runslogicalStart; visualLimit=runs->visualLimit; if(IS_EVEN_RUN(logicalStart)) { do { /* LTR */ *aIndexMap++ = logicalStart++; } while(++visualStart=maxLevel */ /* look for the first index of such a sequence */ while(start=aLength) { break; /* no more such sequences */ } /* look for the limit of such a sequence (the index behind it) */ for(limit=start; ++limit=maxLevel;) {} /* * sos=start of sequence, eos=end of sequence * * The closed (inclusive) interval from sos to eos includes all the logical * and visual indexes within this sequence. They are logically and * visually contiguous and in the same range. * * For each run, the new visual index=sos+eos-old visual index; * we pre-add sos+eos into sumOfSosEos -> * new visual index=sumOfSosEos-old visual index; */ sumOfSosEos=start+limit-1; /* reorder each index in the sequence */ do { aIndexMap[start]=sumOfSosEos-aIndexMap[start]; } while(++start=minLevel); return NS_OK; } nsresult nsBidi::InvertMap(const PRInt32 *aSrcMap, PRInt32 *aDestMap, PRInt32 aLength) { if(aSrcMap!=NULL && aDestMap!=NULL) { aSrcMap+=aLength; while(aLength>0) { aDestMap[*--aSrcMap]=--aLength; } } return NS_OK; } PRInt32 nsBidi::doWriteReverse(const PRUnichar *src, PRInt32 srcLength, PRUnichar *dest, PRUint16 options) { /* * RTL run - * * RTL runs need to be copied to the destination in reverse order * of code points, not code units, to keep Unicode characters intact. * * The general strategy for this is to read the source text * in backward order, collect all code units for a code point * (and optionally following combining characters, see below), * and copy all these code units in ascending order * to the destination for this run. * * Several options request whether combining characters * should be kept after their base characters, * whether Bidi control characters should be removed, and * whether characters should be replaced by their mirror-image * equivalent Unicode characters. */ PRInt32 i, j, destSize; PRUint32 c; /* optimize for several combinations of options */ switch(options&(NSBIDI_REMOVE_BIDI_CONTROLS|NSBIDI_DO_MIRRORING|NSBIDI_KEEP_BASE_COMBINING)) { case 0: /* * With none of the "complicated" options set, the destination * run will have the same length as the source run, * and there is no mirroring and no keeping combining characters * with their base characters. */ destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ UTF_BACK_1(src, 0, srcLength); /* copy this base character */ j=srcLength; do { *dest++=src[j++]; } while(j0); break; case NSBIDI_KEEP_BASE_COMBINING: /* * Here, too, the destination * run will have the same length as the source run, * and there is no mirroring. * We do need to keep combining characters with their base characters. */ destSize=srcLength; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units and modifier letters for one base character */ do { UTF_PREV_CHAR(src, 0, srcLength, c); } while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)); /* copy this "user character" */ j=srcLength; do { *dest++=src[j++]; } while(j0); break; default: /* * With several "complicated" options set, this is the most * general and the slowest copying of an RTL run. * We will do mirroring, remove Bidi controls, and * keep combining characters with their base characters * as requested. */ if(!(options&NSBIDI_REMOVE_BIDI_CONTROLS)) { i=srcLength; } else { /* we need to find out the destination length of the run, which will not include the Bidi control characters */ PRInt32 length=srcLength; PRUnichar ch; i=0; do { ch=*src++; if (!IsBidiControl((PRUint32)ch)) { ++i; } } while(--length>0); src-=srcLength; } destSize=i; /* preserve character integrity */ do { /* i is always after the last code unit known to need to be kept in this segment */ i=srcLength; /* collect code units for one base character */ UTF_PREV_CHAR(src, 0, srcLength, c); if(options&NSBIDI_KEEP_BASE_COMBINING) { /* collect modifier letters for this base character */ while(srcLength>0 && IsBidiCategory(c, eBidiCat_NSM)) { UTF_PREV_CHAR(src, 0, srcLength, c); } } if(options&NSBIDI_REMOVE_BIDI_CONTROLS && IsBidiControl(c)) { /* do not copy this Bidi control character */ continue; } /* copy this "user character" */ j=srcLength; if(options&NSBIDI_DO_MIRRORING) { /* mirror only the base character */ c = SymmSwap(c); PRInt32 k=0; UTF_APPEND_CHAR_UNSAFE(dest, k, c); dest+=k; j+=k; } while(j0); break; } /* end of switch */ return destSize; } nsresult nsBidi::WriteReverse(const PRUnichar *aSrc, PRInt32 aSrcLength, PRUnichar *aDest, PRUint16 aOptions, PRInt32 *aDestSize) { if( aSrc==NULL || aSrcLength<0 || aDest==NULL ) { return NS_ERROR_INVALID_ARG; } /* do input and output overlap? */ if( aSrc>=aDest && aSrc=aSrc && aDest0) { *aDestSize = doWriteReverse(aSrc, aSrcLength, aDest, aOptions); } return NS_OK; } #endif // FULL_BIDI_ENGINE #endif // IBMBIDI