Bug 1824671 - patch 3 - Convert intl Bidi component to be backed by the unicode-bidi crate. r=platform-i18n-reviewers,dminor

With this, the intl::Bidi component no longer relies on ICU4C's ubidi_* APIs.

Differential Revision: https://phabricator.services.mozilla.com/D197890
This commit is contained in:
Jonathan Kew 2024-02-06 11:41:09 +00:00
Родитель 7731a2b908
Коммит fafbd7f128
10 изменённых файлов: 127 добавлений и 24 удалений

9
Cargo.lock сгенерированный
Просмотреть файл

@ -2242,6 +2242,7 @@ dependencies = [
"unic-langid",
"unic-langid-ffi",
"unicode-bidi",
"unicode-bidi-ffi",
"uniffi",
"uniffi-example-arithmetic",
"uniffi-example-custom-types",
@ -3008,6 +3009,7 @@ dependencies = [
"icu_capi",
"mozglue-static",
"smoosh",
"unicode-bidi-ffi",
]
[[package]]
@ -5806,6 +5808,13 @@ version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
[[package]]
name = "unicode-bidi-ffi"
version = "0.1.0"
dependencies = [
"unicode-bidi",
]
[[package]]
name = "unicode-ident"
version = "1.0.6"

Просмотреть файл

@ -219,9 +219,9 @@ PositionAlignSetting TextTrackCue::ComputedPositionAlign() {
}
bool TextTrackCue::IsTextBaseDirectionLTR() const {
// The returned result by `ubidi_getBaseDirection` might be `neutral` if the
// text only contains netural charaters. In this case, we would treat its
// base direction as LTR.
// The result returned by `GetBaseDirection` might be `neutral` if the text
// only contains neutral charaters. In this case, we would treat its base
// direction as LTR.
return intl::Bidi::GetBaseDirection(mText) != intl::Bidi::BaseDirection::RTL;
}

Просмотреть файл

@ -6,15 +6,30 @@
#include "mozilla/Casting.h"
#include "mozilla/intl/ICU4CGlue.h"
#include "unicode/ubidi.h"
#if !USE_RUST_UNICODE_BIDI
# include "unicode/ubidi.h"
#endif
namespace mozilla::intl {
#if USE_RUST_UNICODE_BIDI
using namespace ffi;
Bidi::Bidi() = default;
Bidi::~Bidi() = default;
#else
Bidi::Bidi() { mBidi = ubidi_open(); }
Bidi::~Bidi() { ubidi_close(mBidi.GetMut()); }
#endif
ICUResult Bidi::SetParagraph(Span<const char16_t> aParagraph,
BidiEmbeddingLevel aLevel) {
#if USE_RUST_UNICODE_BIDI
const auto* text = reinterpret_cast<const uint16_t*>(aParagraph.Elements());
mBidi.reset(bidi_new(text, aParagraph.Length(), aLevel));
return ToICUResult(U_ZERO_ERROR);
#else
// Do not allow any reordering of the runs, as this can change the
// performance characteristics of working with runs. In the default mode,
// the levels can be iterated over directly, rather than relying on computing
@ -35,9 +50,24 @@ ICUResult Bidi::SetParagraph(Span<const char16_t> aParagraph,
mLevels = nullptr;
return ToICUResult(status);
#endif
}
Bidi::ParagraphDirection Bidi::GetParagraphDirection() const {
#if USE_RUST_UNICODE_BIDI
auto dir = bidi_get_direction(mBidi.get());
switch (dir) {
case -1:
return Bidi::ParagraphDirection::RTL;
case 0:
return Bidi::ParagraphDirection::Mixed;
case 1:
return Bidi::ParagraphDirection::LTR;
default:
MOZ_ASSERT_UNREACHABLE("Bad direction value");
return Bidi::ParagraphDirection::Mixed;
}
#else
switch (ubidi_getDirection(mBidi.GetConst())) {
case UBIDI_LTR:
return Bidi::ParagraphDirection::LTR;
@ -51,20 +81,39 @@ Bidi::ParagraphDirection Bidi::GetParagraphDirection() const {
MOZ_ASSERT_UNREACHABLE("Unexpected UBiDiDirection value.");
};
return Bidi::ParagraphDirection::Mixed;
#endif
}
/* static */
void Bidi::ReorderVisual(const BidiEmbeddingLevel* aLevels, int32_t aLength,
int32_t* aIndexMap) {
#if USE_RUST_UNICODE_BIDI
bidi_reorder_visual(reinterpret_cast<const uint8_t*>(aLevels), aLength,
aIndexMap);
#else
ubidi_reorderVisual(reinterpret_cast<const uint8_t*>(aLevels), aLength,
aIndexMap);
#endif
}
/* static */
Bidi::BaseDirection Bidi::GetBaseDirection(Span<const char16_t> aParagraph) {
Bidi::BaseDirection Bidi::GetBaseDirection(Span<const char16_t> aText) {
#if USE_RUST_UNICODE_BIDI
const auto* text = reinterpret_cast<const uint16_t*>(aText.Elements());
switch (bidi_get_base_direction(text, aText.Length(), false)) {
case -1:
return Bidi::BaseDirection::RTL;
case 0:
return Bidi::BaseDirection::Neutral;
case 1:
return Bidi::BaseDirection::LTR;
default:
MOZ_ASSERT_UNREACHABLE("Bad base direction value");
return Bidi::BaseDirection::Neutral;
}
#else
UBiDiDirection direction = ubidi_getBaseDirection(
aParagraph.Elements(), AssertedCast<int32_t>(aParagraph.Length()));
aText.Elements(), AssertedCast<int32_t>(aText.Length()));
switch (direction) {
case UBIDI_LTR:
return Bidi::BaseDirection::LTR;
@ -75,10 +124,11 @@ Bidi::BaseDirection Bidi::GetBaseDirection(Span<const char16_t> aParagraph) {
case UBIDI_MIXED:
MOZ_ASSERT_UNREACHABLE("Unexpected UBiDiDirection value.");
}
return Bidi::BaseDirection::Neutral;
#endif
}
#if !USE_RUST_UNICODE_BIDI
static BidiDirection ToBidiDirection(UBiDiDirection aDirection) {
switch (aDirection) {
case UBIDI_LTR:
@ -91,8 +141,12 @@ static BidiDirection ToBidiDirection(UBiDiDirection aDirection) {
}
return BidiDirection::LTR;
}
#endif
Result<int32_t, ICUError> Bidi::CountRuns() {
#if USE_RUST_UNICODE_BIDI
return bidi_count_runs(mBidi.get());
#else
UErrorCode status = U_ZERO_ERROR;
int32_t runCount = ubidi_countRuns(mBidi.GetMut(), &status);
if (U_FAILURE(status)) {
@ -108,31 +162,51 @@ Result<int32_t, ICUError> Bidi::CountRuns() {
}
return runCount;
#endif
}
void Bidi::GetLogicalRun(int32_t aLogicalStart, int32_t* aLogicalLimitOut,
BidiEmbeddingLevel* aLevelOut) {
#if USE_RUST_UNICODE_BIDI
const int32_t length = bidi_get_length(mBidi.get());
MOZ_DIAGNOSTIC_ASSERT(aLogicalStart < length);
const auto* levels = bidi_get_levels(mBidi.get());
#else
MOZ_ASSERT(mLevels, "CountRuns hasn't been run?");
MOZ_RELEASE_ASSERT(aLogicalStart < mLength, "Out of bound");
BidiEmbeddingLevel level = mLevels[aLogicalStart];
const int32_t length = mLength;
const auto* levels = mLevels;
#endif
const uint8_t level = levels[aLogicalStart];
int32_t limit;
for (limit = aLogicalStart + 1; limit < mLength; limit++) {
if (mLevels[limit] != level) {
for (limit = aLogicalStart + 1; limit < length; limit++) {
if (levels[limit] != level) {
break;
}
}
*aLogicalLimitOut = limit;
*aLevelOut = level;
*aLevelOut = BidiEmbeddingLevel(level);
}
BidiEmbeddingLevel Bidi::GetParagraphEmbeddingLevel() const {
#if USE_RUST_UNICODE_BIDI
return BidiEmbeddingLevel(bidi_get_paragraph_level(mBidi.get()));
#else
return BidiEmbeddingLevel(ubidi_getParaLevel(mBidi.GetConst()));
#endif
}
BidiDirection Bidi::GetVisualRun(int32_t aRunIndex, int32_t* aLogicalStart,
int32_t* aLength) {
#if USE_RUST_UNICODE_BIDI
auto run = bidi_get_visual_run(mBidi.get(), aRunIndex);
*aLogicalStart = run.start;
*aLength = run.length;
return BidiEmbeddingLevel(run.level).Direction();
#else
return ToBidiDirection(
ubidi_getVisualRun(mBidi.GetMut(), aRunIndex, aLogicalStart, aLength));
#endif
}
} // namespace mozilla::intl

Просмотреть файл

@ -7,7 +7,13 @@
#include "mozilla/intl/BidiEmbeddingLevel.h"
#include "mozilla/intl/ICU4CGlue.h"
#define USE_RUST_UNICODE_BIDI 1
#if USE_RUST_UNICODE_BIDI
# include "mozilla/intl/unicode_bidi_ffi_generated.h"
#else
struct UBiDi;
#endif
namespace mozilla::intl {
@ -116,9 +122,9 @@ class Bidi final {
enum class BaseDirection { LTR, RTL, Neutral };
/**
* Get the base direction of the paragraph.
* Get the base direction of the text.
*/
static BaseDirection GetBaseDirection(Span<const char16_t> aParagraph);
static BaseDirection GetBaseDirection(Span<const char16_t> aText);
/**
* Get one run's logical start, length, and directionality. In an RTL run, the
@ -142,6 +148,15 @@ class Bidi final {
int32_t* aLength);
private:
#if USE_RUST_UNICODE_BIDI
using UnicodeBidi = mozilla::intl::ffi::UnicodeBidi;
struct BidiFreePolicy {
void operator()(void* aPtr) {
bidi_destroy(static_cast<UnicodeBidi*>(aPtr));
}
};
mozilla::UniquePtr<UnicodeBidi, BidiFreePolicy> mBidi;
#else
ICUPointer<UBiDi> mBidi = ICUPointer<UBiDi>(nullptr);
/**
@ -154,6 +169,7 @@ class Bidi final {
* The length of the paragraph from `Bidi::SetParagraph`.
*/
int32_t mLength = 0;
#endif
};
} // namespace mozilla::intl

Просмотреть файл

@ -9,19 +9,17 @@ TEST_DIRS += [
]
DIRS += [
"bidi",
"build",
"components",
"hyphenation/glue",
"l10n",
"locale",
"locales",
"lwbrk",
"strres",
"unicharutil",
"l10n",
]
DIRS += [
"uconv",
"build",
"unicharutil",
]
EXPORTS.mozilla += [

Просмотреть файл

@ -16,6 +16,7 @@ smoosh = { path = "../../frontend/smoosh", optional = true }
mozglue-static = { path = "../../../../mozglue/static/rust" }
gluesmith = { path = "../../fuzz-tests/gluesmith", optional = true }
icu_capi = { version = "1.4.0", optional = true, default-features= false, features = ["any_provider", "compiled_data", "icu_segmenter"] }
unicode-bidi-ffi = { path = "../../../../intl/bidi/rust/unicode-bidi-ffi" }
[features]
simd-accel = ['encoding_c/simd-accel']

Просмотреть файл

@ -15,6 +15,7 @@
extern crate encoding_c;
extern crate encoding_c_mem;
extern crate mozglue_static;
extern crate unicode_bidi_ffi;
#[cfg(feature = "smoosh")]
extern crate smoosh;

Просмотреть файл

@ -997,10 +997,12 @@ nsresult nsBidiPresUtils::ResolveParagraph(BidiParagraphData* aBpd) {
if (++numRun >= runCount) {
// We've run out of runs of text; but don't forget to store bidi data
// to the frame before breaking out of the loop (bug 1426042).
storeBidiDataToFrame();
if (isTextFrame) {
frame->AdjustOffsetsForBidi(contentOffset,
contentOffset + fragmentLength);
if (frame != NS_BIDI_CONTROL_FRAME) {
storeBidiDataToFrame();
if (isTextFrame) {
frame->AdjustOffsetsForBidi(contentOffset,
contentOffset + fragmentLength);
}
}
break;
}

Просмотреть файл

@ -59,6 +59,7 @@ fluent-langneg-ffi = { path = "../../../../intl/locale/rust/fluent-langneg-ffi"
oxilangtag = "0.1.3"
oxilangtag-ffi = { path = "../../../../intl/locale/rust/oxilangtag-ffi" }
unicode-bidi = "0.3.15"
unicode-bidi-ffi = { path = "../../../../intl/bidi/rust/unicode-bidi-ffi" }
rure = "0.2.2"
rust_minidump_writer_linux = { path = "../../../crashreporter/rust_minidump_writer_linux", optional = true }
mozannotation_client = { path = "../../../crashreporter/mozannotation_client", optional = true }

Просмотреть файл

@ -88,6 +88,7 @@ extern crate fluent;
extern crate fluent_ffi;
extern crate oxilangtag_ffi;
extern crate unicode_bidi_ffi;
extern crate rure;