Bug 1495571 - Part 7: Make the output a lambda. r=efaust

Depends on D7375

Differential Revision: https://phabricator.services.mozilla.com/D7376

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Jason Orendorff 2018-10-02 14:26:01 +00:00
Родитель 0d6301a6d9
Коммит 4d45a905c2
1 изменённых файлов: 31 добавлений и 48 удалений

Просмотреть файл

@ -280,20 +280,15 @@ static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
// If making changes to this algorithm, make sure to also update // If making changes to this algorithm, make sure to also update
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
// //
// Scan UTF8 input and (internally, at least) convert it to a series of // Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
// UTF-16 code units. But you can also do odd things like pass // code units. But you can also do odd things like pass an empty lambda for
// CharT=Latin1Char, in which case each output code unit is silently truncated // `dst`, in which case the output is discarded entirely--the only effect of
// to 8 bits; or Action=Count, in which case the output is discarded entirely // calling the template that way is error-checking.
// because we're just counting how many UTF-16 code units of output there are. template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename OutputFn>
template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharT>
static bool static bool
InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp, InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst,
JS::SmallestEncoding *smallestEncoding) JS::SmallestEncoding *smallestEncoding)
{ {
static_assert(std::is_same<CharT, char16_t>::value ||
std::is_same<CharT, Latin1Char>::value,
"bad CharT");
if (Action != Nop) { if (Action != Nop) {
*smallestEncoding = JS::SmallestEncoding::ASCII; *smallestEncoding = JS::SmallestEncoding::ASCII;
} }
@ -304,17 +299,12 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
*smallestEncoding = JS::SmallestEncoding::UTF16; *smallestEncoding = JS::SmallestEncoding::UTF16;
}; };
// Count how many code units need to be in the inflated string.
// |i| is the index into |src|, and |j| is the the index into |dst|.
size_t srclen = src.length(); size_t srclen = src.length();
uint32_t j = 0; for (uint32_t i = 0; i < srclen; i++) {
for (uint32_t i = 0; i < srclen; i++, j++) {
uint32_t v = uint32_t(src[i]); uint32_t v = uint32_t(src[i]);
if (!(v & 0x80)) { if (!(v & 0x80)) {
// ASCII code unit. Simple copy. // ASCII code unit. Simple copy.
if (Action == Copy) { dst(uint16_t(v));
dst[j] = CharT(v);
}
} else { } else {
// Non-ASCII code unit. Determine its length in bytes (n). // Non-ASCII code unit. Determine its length in bytes (n).
@ -338,9 +328,7 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \ MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
replacement = '?'; \ replacement = '?'; \
} \ } \
if (Action == Copy) { \ dst(replacement); \
dst[j] = CharT(replacement); \
} \
n = n2; \ n = n2; \
goto invalidMultiByteCodeUnit; \ goto invalidMultiByteCodeUnit; \
} \ } \
@ -379,7 +367,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
if (v > 0xff) { if (v > 0xff) {
RequireUTF16(); RequireUTF16();
if (Action == FindEncoding) { if (Action == FindEncoding) {
MOZ_ASSERT(dst == nullptr);
return true; return true;
} }
} else { } else {
@ -388,21 +375,13 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
} }
if (v < 0x10000) { if (v < 0x10000) {
// The n-byte UTF8 code unit will fit in a single CharT. // The n-byte UTF8 code unit will fit in a single CharT.
if (Action == Copy) { dst(char16_t(v));
dst[j] = CharT(v);
}
} else { } else {
v -= 0x10000; v -= 0x10000;
if (v <= 0xFFFFF) { if (v <= 0xFFFFF) {
// The n-byte UTF8 code unit will fit in two CharT units. // The n-byte UTF8 code unit will fit in two CharT units.
if (Action == Copy) { dst(char16_t((v >> 10) + 0xD800));
dst[j] = CharT((v >> 10) + 0xD800); dst(char16_t((v & 0x3FF) + 0xDC00));
}
j++;
if (Action == Copy) {
dst[j] = CharT((v & 0x3FF) + 0xDC00);
}
} else { } else {
// The n-byte UTF8 code unit won't fit in two CharT units. // The n-byte UTF8 code unit won't fit in two CharT units.
INVALID(ReportTooBigCharacter, v, 1); INVALID(ReportTooBigCharacter, v, 1);
@ -420,10 +399,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
} }
} }
if (Action != Nop && Action != FindEncoding) {
*dstlenp = j;
}
return true; return true;
} }
@ -432,12 +407,19 @@ static CharsT
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen) InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
{ {
using CharT = typename CharsT::CharT; using CharT = typename CharsT::CharT;
static_assert(std::is_same<CharT, char16_t>::value ||
std::is_same<CharT, Latin1Char>::value,
"bad CharT");
*outlen = 0; *outlen = 0;
JS::SmallestEncoding encoding; JS::SmallestEncoding encoding;
if (!InflateUTF8ToUTF16<Count, ErrorAction, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) { size_t len = 0;
auto count = [&](char16_t) { len++; };
if (!InflateUTF8ToUTF16<Count, ErrorAction>(cx, src, count, &encoding)) {
return CharsT(); return CharsT();
} }
*outlen = len;
CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1); // +1 for NUL CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1); // +1 for NUL
if (!dst) { if (!dst) {
@ -451,12 +433,15 @@ InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
for (uint32_t i = 0; i < srclen; i++) { for (uint32_t i = 0; i < srclen; i++) {
dst[i] = CharT(src[i]); dst[i] = CharT(src[i]);
} }
} else if (std::is_same<decltype(dst[0]), Latin1Char>::value) {
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertQuestionMark, CharT>(cx, src, dst, outlen, &encoding)));
} else { } else {
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding))); constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
? OnUTF8Error::InsertQuestionMark
: OnUTF8Error::InsertReplacementCharacter;
size_t j = 0;
auto push = [&](char16_t c) { dst[j++] = CharT(c); };
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, errorMode>(cx, src, push, &encoding)));
MOZ_ASSERT(j == len);
} }
dst[*outlen] = 0; // NUL char dst[*outlen] = 0; // NUL char
return CharsT(dst, *outlen); return CharsT(dst, *outlen);
@ -492,11 +477,10 @@ JS::SmallestEncoding
JS::FindSmallestEncoding(UTF8Chars utf8) JS::FindSmallestEncoding(UTF8Chars utf8)
{ {
JS::SmallestEncoding encoding; JS::SmallestEncoding encoding;
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter, char16_t>( MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter>(
/* cx = */ nullptr, /* cx = */ nullptr,
utf8, utf8,
/* dst = */ nullptr, [](char16_t) {},
/* dstlen = */ nullptr,
&encoding))); &encoding)));
return encoding; return encoding;
} }
@ -519,11 +503,10 @@ JS::ConstUTF8CharsZ::validate(size_t aLength)
{ {
MOZ_ASSERT(data_); MOZ_ASSERT(data_);
UTF8Chars chars(data_, aLength); UTF8Chars chars(data_, aLength);
InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash, char16_t>( InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash>(
/* cx = */ nullptr, /* cx = */ nullptr,
chars, chars,
/* dst = */ nullptr, [](char16_t) {},
/* dstlen = */ nullptr,
/* smallestEncoding = */ nullptr); /* smallestEncoding = */ nullptr);
} }
#endif #endif