gecko-dev/dom/base/nsLineBreaker.cpp

504 строки
18 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsLineBreaker.h"
#include "nsContentUtils.h"
#include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
#include "nsHyphenationManager.h"
#include "nsHyphenator.h"
#include "mozilla/gfx/2D.h"
#include "mozilla/intl/LineBreaker.h"
#include "mozilla/intl/MozLocale.h"
using mozilla::intl::LineBreaker;
using mozilla::intl::Locale;
nsLineBreaker::nsLineBreaker()
: mCurrentWordLanguage(nullptr),
mCurrentWordContainsMixedLang(false),
mCurrentWordContainsComplexChar(false),
mScriptIsChineseOrJapanese(false),
mAfterBreakableSpace(false),
mBreakHere(false),
mWordBreak(LineBreaker::WordBreak::Normal),
mStrictness(LineBreaker::Strictness::Auto),
mWordContinuation(false) {}
nsLineBreaker::~nsLineBreaker() {
NS_ASSERTION(mCurrentWord.Length() == 0,
"Should have Reset() before destruction!");
}
static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
bool* aCapitalization) {
// Capitalize the first alphanumeric character after a space or start
// of the word.
// The only space character a word can contain is NBSP.
bool capitalizeNextChar = true;
for (uint32_t i = 0; i < aLength; ++i) {
uint32_t ch = aWord[i];
if (capitalizeNextChar) {
if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
NS_IS_LOW_SURROGATE(aWord[i + 1])) {
ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
}
if (nsContentUtils::IsAlphanumeric(ch)) {
aCapitalization[i] = true;
capitalizeNextChar = false;
}
if (!IS_IN_BMP(ch)) {
++i;
}
}
if (ch == 0xA0 /*NBSP*/) {
capitalizeNextChar = true;
}
}
}
nsresult nsLineBreaker::FlushCurrentWord() {
uint32_t length = mCurrentWord.Length();
AutoTArray<uint8_t, 4000> breakState;
if (!breakState.AppendElements(length)) {
return NS_ERROR_OUT_OF_MEMORY;
}
nsTArray<bool> capitalizationState;
if (mStrictness == LineBreaker::Strictness::Anywhere) {
memset(breakState.Elements(),
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
length * sizeof(uint8_t));
} else if (!mCurrentWordContainsComplexChar) {
// For break-strict set everything internal to "break", otherwise
// to "no break"!
memset(breakState.Elements(),
mWordBreak == LineBreaker::WordBreak::BreakAll
? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
length * sizeof(uint8_t));
} else {
nsContentUtils::LineBreaker()->GetJISx4051Breaks(
mCurrentWord.Elements(), length, mWordBreak, mStrictness,
mScriptIsChineseOrJapanese, breakState.Elements());
}
bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
uint32_t i;
for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
TextItem* ti = &mTextItems[i];
if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
autoHyphenate = false;
}
}
if (autoHyphenate) {
RefPtr<nsHyphenator> hyphenator =
nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
if (hyphenator) {
FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
mCurrentWord.Elements() + length,
breakState.Elements());
}
}
uint32_t offset = 0;
for (i = 0; i < mTextItems.Length(); ++i) {
TextItem* ti = &mTextItems[i];
NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
}
if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
memset(breakState.Elements() + offset + exclude,
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
(ti->mLength - exclude) * sizeof(uint8_t));
}
// Don't set the break state for the first character of the word, because
// it was already set correctly earlier and we don't know what the true
// value should be.
uint32_t skipSet = i == 0 ? 1 : 0;
if (ti->mSink) {
ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
breakState.Elements() + offset + skipSet);
if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
if (capitalizationState.Length() == 0) {
if (!capitalizationState.AppendElements(length))
return NS_ERROR_OUT_OF_MEMORY;
memset(capitalizationState.Elements(), false, length * sizeof(bool));
SetupCapitalization(mCurrentWord.Elements(), length,
capitalizationState.Elements());
}
ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
capitalizationState.Elements() + offset);
}
}
offset += ti->mLength;
}
mCurrentWord.Clear();
mTextItems.Clear();
mCurrentWordContainsComplexChar = false;
mCurrentWordContainsMixedLang = false;
mCurrentWordLanguage = nullptr;
mWordContinuation = false;
return NS_OK;
}
// If the aFlags parameter to AppendText has all these bits set,
// then we don't need to worry about finding break opportunities
// in the appended text.
#define NO_BREAKS_NEEDED_FLAGS \
(BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
BREAK_SKIP_SETTING_NO_BREAKS)
nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
const char16_t* aText, uint32_t aLength,
uint32_t aFlags, nsILineBreakSink* aSink) {
NS_ASSERTION(aLength > 0, "Appending empty text...");
uint32_t offset = 0;
// Continue the current word
if (mCurrentWord.Length() > 0) {
NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
"These should not be set");
while (offset < aLength && !IsSpace(aText[offset])) {
mCurrentWord.AppendElement(aText[offset]);
if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
mCurrentWordContainsComplexChar = true;
}
UpdateCurrentWordLanguage(aHyphenationLanguage);
++offset;
}
if (offset > 0) {
mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
}
if (offset == aLength) return NS_OK;
// We encountered whitespace, so we're done with this word
nsresult rv = FlushCurrentWord();
if (NS_FAILED(rv)) return rv;
}
AutoTArray<uint8_t, 4000> breakState;
if (aSink) {
if (!breakState.AppendElements(aLength)) return NS_ERROR_OUT_OF_MEMORY;
}
bool noCapitalizationNeeded = true;
nsTArray<bool> capitalizationState;
if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
if (!capitalizationState.AppendElements(aLength))
return NS_ERROR_OUT_OF_MEMORY;
memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
noCapitalizationNeeded = false;
}
uint32_t start = offset;
bool noBreaksNeeded =
!aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
!mBreakHere && !mAfterBreakableSpace);
if (noBreaksNeeded && noCapitalizationNeeded) {
// Skip to the space before the last word, since either the break data
// here is not needed, or no breaks are set in the sink and there cannot
// be any breaks in this chunk; and we don't need to do word-initial
// capitalization. All we need is the context for the next chunk (if any).
offset = aLength;
while (offset > start) {
--offset;
if (IsSpace(aText[offset])) break;
}
}
uint32_t wordStart = offset;
bool wordHasComplexChar = false;
RefPtr<nsHyphenator> hyphenator;
if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
!(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
hyphenator =
nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
}
for (;;) {
char16_t ch = aText[offset];
bool isSpace = IsSpace(ch);
bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
if (aSink && !noBreaksNeeded) {
breakState[offset] =
mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
mWordBreak == LineBreaker::WordBreak::BreakAll ||
mStrictness == LineBreaker::Strictness::Anywhere
? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
}
mBreakHere = false;
mAfterBreakableSpace = isBreakableSpace;
if (isSpace || ch == '\n') {
if (offset > wordStart && aSink) {
if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
if (mStrictness == LineBreaker::Strictness::Anywhere) {
memset(breakState.Elements() + wordStart,
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
offset - wordStart);
} else if (wordHasComplexChar) {
// Save current start-of-word state because GetJISx4051Breaks will
// set it to false
uint8_t currentStart = breakState[wordStart];
nsContentUtils::LineBreaker()->GetJISx4051Breaks(
aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
breakState[wordStart] = currentStart;
}
if (hyphenator) {
FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
breakState.Elements() + wordStart);
}
}
if (!mWordContinuation && !noCapitalizationNeeded) {
SetupCapitalization(aText + wordStart, offset - wordStart,
capitalizationState.Elements() + wordStart);
}
}
wordHasComplexChar = false;
mWordContinuation = false;
++offset;
if (offset >= aLength) break;
wordStart = offset;
} else {
if (!wordHasComplexChar && IsComplexChar(ch)) {
wordHasComplexChar = true;
}
++offset;
if (offset >= aLength) {
// Save this word
mCurrentWordContainsComplexChar = wordHasComplexChar;
uint32_t len = offset - wordStart;
char16_t* elems = mCurrentWord.AppendElements(len);
if (!elems) return NS_ERROR_OUT_OF_MEMORY;
memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
// Ensure that the break-before for this word is written out
offset = wordStart + 1;
UpdateCurrentWordLanguage(aHyphenationLanguage);
break;
}
}
}
if (aSink) {
if (!noBreaksNeeded) {
aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
}
if (!noCapitalizationNeeded) {
aSink->SetCapitalization(start, offset - start,
capitalizationState.Elements() + start);
}
}
return NS_OK;
}
void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
const char16_t* aTextStart,
const char16_t* aTextLimit,
uint8_t* aBreakState) {
nsDependentSubstring string(aTextStart, aTextLimit);
AutoTArray<bool, 200> hyphens;
if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
if (hyphens[i]) {
aBreakState[i + 1] =
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
}
}
}
}
nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
const uint8_t* aText, uint32_t aLength,
uint32_t aFlags, nsILineBreakSink* aSink) {
NS_ASSERTION(aLength > 0, "Appending empty text...");
if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
// Defer to the Unicode path if capitalization or hyphenation is required
nsAutoString str;
const char* cp = reinterpret_cast<const char*>(aText);
CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
}
uint32_t offset = 0;
// Continue the current word
if (mCurrentWord.Length() > 0) {
NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
"These should not be set");
while (offset < aLength && !IsSpace(aText[offset])) {
mCurrentWord.AppendElement(aText[offset]);
if (!mCurrentWordContainsComplexChar &&
IsComplexASCIIChar(aText[offset])) {
mCurrentWordContainsComplexChar = true;
}
++offset;
}
if (offset > 0) {
mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
}
if (offset == aLength) {
// We did not encounter whitespace so the word hasn't finished yet.
return NS_OK;
}
// We encountered whitespace, so we're done with this word
nsresult rv = FlushCurrentWord();
if (NS_FAILED(rv)) return rv;
}
AutoTArray<uint8_t, 4000> breakState;
if (aSink) {
if (!breakState.AppendElements(aLength)) return NS_ERROR_OUT_OF_MEMORY;
}
uint32_t start = offset;
bool noBreaksNeeded =
!aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
!mBreakHere && !mAfterBreakableSpace);
if (noBreaksNeeded) {
// Skip to the space before the last word, since either the break data
// here is not needed, or no breaks are set in the sink and there cannot
// be any breaks in this chunk; all we need is the context for the next
// chunk (if any)
offset = aLength;
while (offset > start) {
--offset;
if (IsSpace(aText[offset])) break;
}
}
uint32_t wordStart = offset;
bool wordHasComplexChar = false;
for (;;) {
uint8_t ch = aText[offset];
bool isSpace = IsSpace(ch);
bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
if (aSink) {
// Consider word-break style. Since the break position of CJK scripts
// will be set by nsILineBreaker, we don't consider CJK at this point.
breakState[offset] =
mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
mWordBreak == LineBreaker::WordBreak::BreakAll ||
mStrictness == LineBreaker::Strictness::Anywhere
? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
: gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
}
mBreakHere = false;
mAfterBreakableSpace = isBreakableSpace;
if (isSpace) {
if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
if (mStrictness == LineBreaker::Strictness::Anywhere) {
memset(breakState.Elements() + wordStart,
gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
offset - wordStart);
} else if (wordHasComplexChar) {
// Save current start-of-word state because GetJISx4051Breaks will
// set it to false
uint8_t currentStart = breakState[wordStart];
nsContentUtils::LineBreaker()->GetJISx4051Breaks(
aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
breakState[wordStart] = currentStart;
}
}
wordHasComplexChar = false;
++offset;
if (offset >= aLength) break;
wordStart = offset;
} else {
if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
wordHasComplexChar = true;
}
++offset;
if (offset >= aLength) {
// Save this word
mCurrentWordContainsComplexChar = wordHasComplexChar;
uint32_t len = offset - wordStart;
char16_t* elems = mCurrentWord.AppendElements(len);
if (!elems) return NS_ERROR_OUT_OF_MEMORY;
uint32_t i;
for (i = wordStart; i < offset; ++i) {
elems[i - wordStart] = aText[i];
}
mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
// Ensure that the break-before for this word is written out
offset = wordStart + 1;
break;
}
}
}
if (!noBreaksNeeded) {
aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
}
return NS_OK;
}
void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
mCurrentWordContainsMixedLang = true;
mScriptIsChineseOrJapanese = false;
} else {
if (aHyphenationLanguage && !mCurrentWordLanguage) {
Locale loc = Locale(nsAtomCString(aHyphenationLanguage));
if (loc.GetScript().IsEmpty()) {
loc.AddLikelySubtags();
}
const nsCString& script = loc.GetScript();
mScriptIsChineseOrJapanese =
script.EqualsLiteral("Hans") || script.EqualsLiteral("Hant") ||
script.EqualsLiteral("Jpan") || script.EqualsLiteral("Hrkt");
}
mCurrentWordLanguage = aHyphenationLanguage;
}
}
nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
nsresult rv = FlushCurrentWord();
if (NS_FAILED(rv)) return rv;
bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
if (mAfterBreakableSpace && !isBreakableSpace) {
mBreakHere = true;
}
mAfterBreakableSpace = isBreakableSpace;
mWordContinuation = false;
return NS_OK;
}
nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
nsresult rv = FlushCurrentWord();
if (NS_FAILED(rv)) return rv;
*aTrailingBreak = mBreakHere || mAfterBreakableSpace;
mBreakHere = false;
mAfterBreakableSpace = false;
return NS_OK;
}