2018-11-30 22:52:05 +03:00
|
|
|
/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-29 19:52:43 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
|
|
/*
|
2010-06-11 23:14:38 +04:00
|
|
|
* This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
|
|
|
|
* for use within Mozilla Gecko, separate from a standard ICU build.
|
|
|
|
*
|
|
|
|
* The original ICU license of the code follows:
|
|
|
|
*
|
|
|
|
* ICU License - ICU 1.8.1 and later
|
|
|
|
*
|
|
|
|
* COPYRIGHT AND PERMISSION NOTICE
|
|
|
|
*
|
|
|
|
* Copyright (c) 1995-2009 International Business Machines Corporation and
|
|
|
|
* others
|
|
|
|
*
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, provided that the above copyright notice(s) and this
|
|
|
|
* permission notice appear in all copies of the Software and that both the
|
|
|
|
* above copyright notice(s) and this permission notice appear in supporting
|
|
|
|
* documentation.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
|
|
|
|
* BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
|
|
|
|
* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
|
|
|
* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
|
|
|
* SOFTWARE.
|
|
|
|
*
|
|
|
|
* Except as contained in this notice, the name of a copyright holder shall
|
|
|
|
* not be used in advertising or otherwise to promote the sale, use or other
|
|
|
|
* dealings in this Software without prior written authorization of the
|
|
|
|
* copyright holder.
|
|
|
|
*
|
|
|
|
* All trademarks and registered trademarks mentioned herein are the property
|
|
|
|
* of their respective owners.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "gfxScriptItemizer.h"
|
2012-02-24 14:15:46 +04:00
|
|
|
#include "nsUnicodeProperties.h"
|
2010-06-11 23:14:38 +04:00
|
|
|
#include "nsCharTraits.h"
|
2013-10-08 03:15:59 +04:00
|
|
|
#include "harfbuzz/hb.h"
|
2020-05-19 17:30:20 +03:00
|
|
|
#include "unicode/uscript.h"
|
2010-06-11 23:14:38 +04:00
|
|
|
|
2017-02-23 05:29:46 +03:00
|
|
|
using namespace mozilla::unicode;
|
|
|
|
|
2010-06-11 23:14:38 +04:00
|
|
|
#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
|
|
|
|
#define LIMIT_INC(sp) \
|
|
|
|
(((sp) < PAREN_STACK_DEPTH) ? (sp) + 1 : PAREN_STACK_DEPTH)
|
|
|
|
#define INC(sp, count) (MOD((sp) + (count)))
|
|
|
|
#define INC1(sp) (INC(sp, 1))
|
|
|
|
#define DEC(sp, count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
|
|
|
|
#define DEC1(sp) (DEC(sp, 1))
|
|
|
|
#define STACK_IS_EMPTY() (pushCount <= 0)
|
|
|
|
#define STACK_IS_NOT_EMPTY() (!STACK_IS_EMPTY())
|
|
|
|
#define TOP() (parenStack[parenSP])
|
|
|
|
#define SYNC_FIXUP() (fixupCount = 0)
|
|
|
|
|
2016-04-21 20:58:59 +03:00
|
|
|
void gfxScriptItemizer::push(uint32_t endPairChar, Script newScriptCode) {
|
2010-06-11 23:14:38 +04:00
|
|
|
pushCount = LIMIT_INC(pushCount);
|
|
|
|
fixupCount = LIMIT_INC(fixupCount);
|
|
|
|
|
|
|
|
parenSP = INC1(parenSP);
|
2012-06-20 23:58:18 +04:00
|
|
|
parenStack[parenSP].endPairChar = endPairChar;
|
2015-09-12 06:30:14 +03:00
|
|
|
parenStack[parenSP].scriptCode = newScriptCode;
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void gfxScriptItemizer::pop() {
|
|
|
|
if (STACK_IS_EMPTY()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fixupCount > 0) {
|
|
|
|
fixupCount -= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pushCount -= 1;
|
|
|
|
parenSP = DEC1(parenSP);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2010-06-11 23:14:38 +04:00
|
|
|
/* If the stack is now empty, reset the stack
|
|
|
|
pointers to their initial values.
|
|
|
|
*/
|
|
|
|
if (STACK_IS_EMPTY()) {
|
|
|
|
parenSP = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-21 20:58:59 +03:00
|
|
|
void gfxScriptItemizer::fixup(Script newScriptCode) {
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t fixupSP = DEC(parenSP, fixupCount);
|
2010-06-11 23:14:38 +04:00
|
|
|
|
|
|
|
while (fixupCount-- > 0) {
|
|
|
|
fixupSP = INC1(fixupSP);
|
2015-09-12 06:30:14 +03:00
|
|
|
parenStack[fixupSP].scriptCode = newScriptCode;
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-15 16:28:54 +03:00
|
|
|
static inline bool CanMergeWithContext(Script aScript) {
|
|
|
|
return aScript <= Script::INHERITED || aScript == Script::UNKNOWN;
|
|
|
|
}
|
|
|
|
|
2017-01-18 23:38:05 +03:00
|
|
|
// We regard the current char as having the same script as the in-progress run
|
2020-05-15 16:28:54 +03:00
|
|
|
// if either script is Common/Inherited/Unknown, or if the run script appears
|
2017-01-18 23:38:05 +03:00
|
|
|
// in the character's ScriptExtensions, or if the char is a cluster extender.
|
|
|
|
static inline bool SameScript(Script runScript, Script currCharScript,
|
|
|
|
uint32_t aCurrCh) {
|
2020-05-15 16:28:54 +03:00
|
|
|
return CanMergeWithContext(runScript) ||
|
|
|
|
CanMergeWithContext(currCharScript) || currCharScript == runScript ||
|
2017-01-18 23:38:05 +03:00
|
|
|
IsClusterExtender(aCurrCh) || HasScript(aCurrCh, runScript);
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
|
|
|
|
2014-01-04 19:02:17 +04:00
|
|
|
gfxScriptItemizer::gfxScriptItemizer(const char16_t* src, uint32_t length)
|
2010-06-11 23:14:38 +04:00
|
|
|
: textPtr(src), textLength(length) {
|
|
|
|
reset();
|
|
|
|
}
|
|
|
|
|
2014-01-04 19:02:17 +04:00
|
|
|
void gfxScriptItemizer::SetText(const char16_t* src, uint32_t length) {
|
2010-06-11 23:14:38 +04:00
|
|
|
textPtr = src;
|
|
|
|
textLength = length;
|
|
|
|
|
|
|
|
reset();
|
|
|
|
}
|
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
bool gfxScriptItemizer::Next(uint32_t& aRunStart, uint32_t& aRunLimit,
|
2016-04-21 20:58:59 +03:00
|
|
|
Script& aRunScript) {
|
2010-06-11 23:14:38 +04:00
|
|
|
/* if we've fallen off the end of the text, we're done */
|
|
|
|
if (scriptLimit >= textLength) {
|
2011-10-17 18:59:28 +04:00
|
|
|
return false;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2010-06-11 23:14:38 +04:00
|
|
|
SYNC_FIXUP();
|
2016-04-21 20:58:59 +03:00
|
|
|
scriptCode = Script::COMMON;
|
2020-05-19 17:30:20 +03:00
|
|
|
Script fallbackScript = Script::UNKNOWN;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2010-06-11 23:14:38 +04:00
|
|
|
for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t ch;
|
2016-04-21 20:58:59 +03:00
|
|
|
Script sc;
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t startOfChar = scriptLimit;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2010-06-11 23:14:38 +04:00
|
|
|
ch = textPtr[scriptLimit];
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2012-06-20 23:58:18 +04:00
|
|
|
/* decode UTF-16 (may be surrogate pair) */
|
|
|
|
if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t low = textPtr[scriptLimit + 1];
|
2012-06-20 23:58:18 +04:00
|
|
|
if (NS_IS_LOW_SURROGATE(low)) {
|
|
|
|
ch = SURROGATE_TO_UCS4(ch, low);
|
|
|
|
scriptLimit += 1;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize gc to UNASSIGNED; we'll only set it to the true GC
|
2016-04-21 20:58:59 +03:00
|
|
|
// if the character has script=COMMON, otherwise we don't care.
|
2012-08-22 19:56:38 +04:00
|
|
|
uint8_t gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2016-04-21 20:58:59 +03:00
|
|
|
sc = GetScriptCode(ch);
|
|
|
|
if (sc == Script::COMMON) {
|
2018-11-30 13:46:48 +03:00
|
|
|
/*
|
2010-06-11 23:14:38 +04:00
|
|
|
* Paired character handling:
|
2018-11-30 13:46:48 +03:00
|
|
|
*
|
2010-06-11 23:14:38 +04:00
|
|
|
* if it's an open character, push it onto the stack.
|
|
|
|
* if it's a close character, find the matching open on the
|
|
|
|
* stack, and use that script code. Any non-matching open
|
2012-06-20 23:58:18 +04:00
|
|
|
* characters above it on the stack will be popped.
|
2018-11-30 13:46:48 +03:00
|
|
|
*
|
2010-06-11 23:14:38 +04:00
|
|
|
* We only do this if the script is COMMON; for chars with
|
2012-08-22 19:56:38 +04:00
|
|
|
* specific script assignments, we just use them as-is.
|
2018-11-30 13:46:48 +03:00
|
|
|
*/
|
2016-01-20 18:17:14 +03:00
|
|
|
gc = GetGeneralCategory(ch);
|
2012-06-20 23:58:18 +04:00
|
|
|
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t endPairChar = mozilla::unicode::GetMirroredChar(ch);
|
2012-06-20 23:58:18 +04:00
|
|
|
if (endPairChar != ch) {
|
|
|
|
push(endPairChar, scriptCode);
|
|
|
|
}
|
2012-08-22 19:56:38 +04:00
|
|
|
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
|
|
|
HasMirroredChar(ch)) {
|
2012-06-20 23:58:18 +04:00
|
|
|
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
|
|
|
|
pop();
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
|
|
|
|
2017-01-18 23:38:05 +03:00
|
|
|
if (STACK_IS_NOT_EMPTY()) {
|
2010-06-11 23:14:38 +04:00
|
|
|
sc = TOP().scriptCode;
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-18 23:38:05 +03:00
|
|
|
if (SameScript(scriptCode, sc, ch)) {
|
2020-05-19 17:30:20 +03:00
|
|
|
if (scriptCode == Script::COMMON) {
|
|
|
|
// If we have not yet resolved a specific scriptCode for the run,
|
|
|
|
// check whether this character provides it.
|
|
|
|
if (!CanMergeWithContext(sc)) {
|
|
|
|
// Use this character's script.
|
|
|
|
scriptCode = sc;
|
|
|
|
fixup(scriptCode);
|
|
|
|
} else if (fallbackScript == Script::UNKNOWN) {
|
|
|
|
// See if the character has a ScriptExtensions property we can
|
|
|
|
// store for use in the event the run remains unresolved.
|
|
|
|
UErrorCode error = U_ZERO_ERROR;
|
|
|
|
UScriptCode extension;
|
|
|
|
int32_t n = uscript_getScriptExtensions(ch, &extension, 1, &error);
|
|
|
|
if (error == U_BUFFER_OVERFLOW_ERROR && n > 0) {
|
|
|
|
fallbackScript = Script(extension);
|
|
|
|
}
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-06-11 23:14:38 +04:00
|
|
|
* if this character is a close paired character,
|
|
|
|
* pop the matching open character from the stack
|
2018-11-30 13:46:48 +03:00
|
|
|
*/
|
2012-06-20 23:58:18 +04:00
|
|
|
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
|
|
|
|
HasMirroredChar(ch)) {
|
2018-11-30 13:46:48 +03:00
|
|
|
pop();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
2010-06-11 23:14:38 +04:00
|
|
|
* reset scriptLimit in case it was advanced during reading a
|
|
|
|
* multiple-code-unit character
|
2018-11-30 13:46:48 +03:00
|
|
|
*/
|
2010-06-11 23:14:38 +04:00
|
|
|
scriptLimit = startOfChar;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
|
|
|
break;
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
2010-06-11 23:14:38 +04:00
|
|
|
|
|
|
|
aRunStart = scriptStart;
|
|
|
|
aRunLimit = scriptLimit;
|
2020-05-19 17:30:20 +03:00
|
|
|
|
|
|
|
if (scriptCode == Script::COMMON && fallbackScript != Script::UNKNOWN) {
|
|
|
|
aRunScript = fallbackScript;
|
|
|
|
} else {
|
|
|
|
aRunScript = scriptCode;
|
|
|
|
}
|
2010-06-11 23:14:38 +04:00
|
|
|
|
2011-10-17 18:59:28 +04:00
|
|
|
return true;
|
2010-06-11 23:14:38 +04:00
|
|
|
}
|