From 5bff5f16b27d94f381f3f802b8c0ec332df1b8d9 Mon Sep 17 00:00:00 2001 From: Greg Tatum Date: Tue, 30 Nov 2021 19:05:57 +0000 Subject: [PATCH] Bug 1719735 - Add a UTF-16 span overload for FillBuffer; r=platform-i18n-reviewers,dminor Differential Revision: https://phabricator.services.mozilla.com/D130798 --- intl/components/src/ICU4CGlue.h | 66 +++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/intl/components/src/ICU4CGlue.h b/intl/components/src/ICU4CGlue.h index 06742c67eec3..b12bae7c0aff 100644 --- a/intl/components/src/ICU4CGlue.h +++ b/intl/components/src/ICU4CGlue.h @@ -290,8 +290,9 @@ static ICUResult FillBufferWithICUCall(AutoTArray& array, #endif /** - * ICU4C works with UTF-16 strings, but consumers of mozilla::intl may require - * UTF-8 strings. + * Fill a UTF-8 or a UTF-16 buffer with a UTF-16 span. ICU4C mostly uses UTF-16 + * internally, but different consumers may have different situations with their + * buffers. */ template [[nodiscard]] bool FillBuffer(Span utf16Span, @@ -331,25 +332,74 @@ template return true; } +/** + * Fill a UTF-8 or a UTF-16 buffer with a UTF-8 span. ICU4C mostly uses UTF-16 + * internally, but different consumers may have different situations with their + * buffers. + */ +template +[[nodiscard]] bool FillBuffer(Span utf8Span, Buffer& targetBuffer) { + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v); + + if constexpr (std::is_same_v || + std::is_same_v) { + size_t amount = utf8Span.Length(); + if (!targetBuffer.reserve(amount)) { + return false; + } + for (size_t i = 0; i < amount; i++) { + targetBuffer.data()[i] = + // Static cast in case of a mismatch between `unsigned char` and + // `char` + static_cast(utf8Span[i]); + } + targetBuffer.written(amount); + } + if constexpr (std::is_same_v) { + if (!targetBuffer.reserve(utf8Span.Length() + 1)) { + return false; + } + + size_t amount = ConvertUtf8toUtf16( + utf8Span, Span(targetBuffer.data(), targetBuffer.capacity())); + + targetBuffer.written(amount); + } + + return true; +} + /** * It is convenient for callers to be able to pass in UTF-8 strings to the API. * This function can be used to convert that to a stack-allocated UTF-16 - * mozilla::Vector that can then be passed into ICU calls. + * mozilla::Vector that can then be passed into ICU calls. The string will be + * null terminated. */ template [[nodiscard]] static bool FillUTF16Vector( Span utf8Span, mozilla::Vector& utf16TargetVec) { // Per ConvertUtf8toUtf16: The length of aDest must be at least one greater - // than the length of aSource + // than the length of aSource. This additional length will be used for null + // termination. if (!utf16TargetVec.reserve(utf8Span.Length() + 1)) { return false; } + // ConvertUtf8toUtf16 fills the buffer with the data, but the length of the - // vector is unchanged. The call to resizeUninitialized notifies the vector of - // how much was written. - return utf16TargetVec.resizeUninitialized(ConvertUtf8toUtf16( - utf8Span, Span(utf16TargetVec.begin(), utf16TargetVec.capacity()))); + // vector is unchanged. + size_t length = ConvertUtf8toUtf16( + utf8Span, Span(utf16TargetVec.begin(), utf16TargetVec.capacity())); + + // Assert that the last element is free for writing a null terminator. + MOZ_ASSERT(length < utf16TargetVec.capacity()); + utf16TargetVec.begin()[length] = '\0'; + + // The call to resizeUninitialized notifies the vector of how much was written + // exclusive of the null terminated character. + return utf16TargetVec.resizeUninitialized(length); } /**