Bug 1495571 - Part 7: Make the output a lambda. r=efaust

Depends on D7375

Differential Revision: https://phabricator.services.mozilla.com/D7376

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Jason Orendorff 2018-10-02 14:26:01 +00:00
Родитель 0d6301a6d9
Коммит 4d45a905c2
1 изменённых файлов: 31 добавлений и 48 удалений

Просмотреть файл

@ -280,20 +280,15 @@ static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
// If making changes to this algorithm, make sure to also update
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
//
// Scan UTF8 input and (internally, at least) convert it to a series of
// UTF-16 code units. But you can also do odd things like pass
// CharT=Latin1Char, in which case each output code unit is silently truncated
// to 8 bits; or Action=Count, in which case the output is discarded entirely
// because we're just counting how many UTF-16 code units of output there are.
template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharT>
// Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
// code units. But you can also do odd things like pass an empty lambda for
// `dst`, in which case the output is discarded entirely--the only effect of
// calling the template that way is error-checking.
template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename OutputFn>
static bool
InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst,
JS::SmallestEncoding *smallestEncoding)
{
static_assert(std::is_same<CharT, char16_t>::value ||
std::is_same<CharT, Latin1Char>::value,
"bad CharT");
if (Action != Nop) {
*smallestEncoding = JS::SmallestEncoding::ASCII;
}
@ -304,17 +299,12 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
*smallestEncoding = JS::SmallestEncoding::UTF16;
};
// Count how many code units need to be in the inflated string.
// |i| is the index into |src|, and |j| is the the index into |dst|.
size_t srclen = src.length();
uint32_t j = 0;
for (uint32_t i = 0; i < srclen; i++, j++) {
for (uint32_t i = 0; i < srclen; i++) {
uint32_t v = uint32_t(src[i]);
if (!(v & 0x80)) {
// ASCII code unit. Simple copy.
if (Action == Copy) {
dst[j] = CharT(v);
}
dst(uint16_t(v));
} else {
// Non-ASCII code unit. Determine its length in bytes (n).
@ -338,9 +328,7 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
replacement = '?'; \
} \
if (Action == Copy) { \
dst[j] = CharT(replacement); \
} \
dst(replacement); \
n = n2; \
goto invalidMultiByteCodeUnit; \
} \
@ -379,7 +367,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
if (v > 0xff) {
RequireUTF16();
if (Action == FindEncoding) {
MOZ_ASSERT(dst == nullptr);
return true;
}
} else {
@ -388,21 +375,13 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
}
if (v < 0x10000) {
// The n-byte UTF8 code unit will fit in a single CharT.
if (Action == Copy) {
dst[j] = CharT(v);
}
dst(char16_t(v));
} else {
v -= 0x10000;
if (v <= 0xFFFFF) {
// The n-byte UTF8 code unit will fit in two CharT units.
if (Action == Copy) {
dst[j] = CharT((v >> 10) + 0xD800);
}
j++;
if (Action == Copy) {
dst[j] = CharT((v & 0x3FF) + 0xDC00);
}
dst(char16_t((v >> 10) + 0xD800));
dst(char16_t((v & 0x3FF) + 0xDC00));
} else {
// The n-byte UTF8 code unit won't fit in two CharT units.
INVALID(ReportTooBigCharacter, v, 1);
@ -420,10 +399,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
}
}
if (Action != Nop && Action != FindEncoding) {
*dstlenp = j;
}
return true;
}
@ -432,12 +407,19 @@ static CharsT
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
{
using CharT = typename CharsT::CharT;
static_assert(std::is_same<CharT, char16_t>::value ||
std::is_same<CharT, Latin1Char>::value,
"bad CharT");
*outlen = 0;
JS::SmallestEncoding encoding;
if (!InflateUTF8ToUTF16<Count, ErrorAction, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) {
size_t len = 0;
auto count = [&](char16_t) { len++; };
if (!InflateUTF8ToUTF16<Count, ErrorAction>(cx, src, count, &encoding)) {
return CharsT();
}
*outlen = len;
CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1); // +1 for NUL
if (!dst) {
@ -451,12 +433,15 @@ InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
for (uint32_t i = 0; i < srclen; i++) {
dst[i] = CharT(src[i]);
}
} else if (std::is_same<decltype(dst[0]), Latin1Char>::value) {
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertQuestionMark, CharT>(cx, src, dst, outlen, &encoding)));
} else {
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding)));
constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
? OnUTF8Error::InsertQuestionMark
: OnUTF8Error::InsertReplacementCharacter;
size_t j = 0;
auto push = [&](char16_t c) { dst[j++] = CharT(c); };
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, errorMode>(cx, src, push, &encoding)));
MOZ_ASSERT(j == len);
}
dst[*outlen] = 0; // NUL char
return CharsT(dst, *outlen);
@ -492,11 +477,10 @@ JS::SmallestEncoding
JS::FindSmallestEncoding(UTF8Chars utf8)
{
JS::SmallestEncoding encoding;
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter, char16_t>(
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter>(
/* cx = */ nullptr,
utf8,
/* dst = */ nullptr,
/* dstlen = */ nullptr,
[](char16_t) {},
&encoding)));
return encoding;
}
@ -519,11 +503,10 @@ JS::ConstUTF8CharsZ::validate(size_t aLength)
{
MOZ_ASSERT(data_);
UTF8Chars chars(data_, aLength);
InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash, char16_t>(
InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash>(
/* cx = */ nullptr,
chars,
/* dst = */ nullptr,
/* dstlen = */ nullptr,
[](char16_t) {},
/* smallestEncoding = */ nullptr);
}
#endif