зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1495571 - Part 7: Make the output a lambda. r=efaust
Depends on D7375 Differential Revision: https://phabricator.services.mozilla.com/D7376 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
0d6301a6d9
Коммит
4d45a905c2
|
@ -280,20 +280,15 @@ static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
|
||||||
// If making changes to this algorithm, make sure to also update
|
// If making changes to this algorithm, make sure to also update
|
||||||
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
|
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
|
||||||
//
|
//
|
||||||
// Scan UTF8 input and (internally, at least) convert it to a series of
|
// Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
|
||||||
// UTF-16 code units. But you can also do odd things like pass
|
// code units. But you can also do odd things like pass an empty lambda for
|
||||||
// CharT=Latin1Char, in which case each output code unit is silently truncated
|
// `dst`, in which case the output is discarded entirely--the only effect of
|
||||||
// to 8 bits; or Action=Count, in which case the output is discarded entirely
|
// calling the template that way is error-checking.
|
||||||
// because we're just counting how many UTF-16 code units of output there are.
|
template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename OutputFn>
|
||||||
template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharT>
|
|
||||||
static bool
|
static bool
|
||||||
InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
|
InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst,
|
||||||
JS::SmallestEncoding *smallestEncoding)
|
JS::SmallestEncoding *smallestEncoding)
|
||||||
{
|
{
|
||||||
static_assert(std::is_same<CharT, char16_t>::value ||
|
|
||||||
std::is_same<CharT, Latin1Char>::value,
|
|
||||||
"bad CharT");
|
|
||||||
|
|
||||||
if (Action != Nop) {
|
if (Action != Nop) {
|
||||||
*smallestEncoding = JS::SmallestEncoding::ASCII;
|
*smallestEncoding = JS::SmallestEncoding::ASCII;
|
||||||
}
|
}
|
||||||
|
@ -304,17 +299,12 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
|
||||||
*smallestEncoding = JS::SmallestEncoding::UTF16;
|
*smallestEncoding = JS::SmallestEncoding::UTF16;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Count how many code units need to be in the inflated string.
|
|
||||||
// |i| is the index into |src|, and |j| is the the index into |dst|.
|
|
||||||
size_t srclen = src.length();
|
size_t srclen = src.length();
|
||||||
uint32_t j = 0;
|
for (uint32_t i = 0; i < srclen; i++) {
|
||||||
for (uint32_t i = 0; i < srclen; i++, j++) {
|
|
||||||
uint32_t v = uint32_t(src[i]);
|
uint32_t v = uint32_t(src[i]);
|
||||||
if (!(v & 0x80)) {
|
if (!(v & 0x80)) {
|
||||||
// ASCII code unit. Simple copy.
|
// ASCII code unit. Simple copy.
|
||||||
if (Action == Copy) {
|
dst(uint16_t(v));
|
||||||
dst[j] = CharT(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Non-ASCII code unit. Determine its length in bytes (n).
|
// Non-ASCII code unit. Determine its length in bytes (n).
|
||||||
|
@ -338,9 +328,7 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
|
||||||
MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
|
MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
|
||||||
replacement = '?'; \
|
replacement = '?'; \
|
||||||
} \
|
} \
|
||||||
if (Action == Copy) { \
|
dst(replacement); \
|
||||||
dst[j] = CharT(replacement); \
|
|
||||||
} \
|
|
||||||
n = n2; \
|
n = n2; \
|
||||||
goto invalidMultiByteCodeUnit; \
|
goto invalidMultiByteCodeUnit; \
|
||||||
} \
|
} \
|
||||||
|
@ -379,7 +367,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
|
||||||
if (v > 0xff) {
|
if (v > 0xff) {
|
||||||
RequireUTF16();
|
RequireUTF16();
|
||||||
if (Action == FindEncoding) {
|
if (Action == FindEncoding) {
|
||||||
MOZ_ASSERT(dst == nullptr);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -388,21 +375,13 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
|
||||||
}
|
}
|
||||||
if (v < 0x10000) {
|
if (v < 0x10000) {
|
||||||
// The n-byte UTF8 code unit will fit in a single CharT.
|
// The n-byte UTF8 code unit will fit in a single CharT.
|
||||||
if (Action == Copy) {
|
dst(char16_t(v));
|
||||||
dst[j] = CharT(v);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
v -= 0x10000;
|
v -= 0x10000;
|
||||||
if (v <= 0xFFFFF) {
|
if (v <= 0xFFFFF) {
|
||||||
// The n-byte UTF8 code unit will fit in two CharT units.
|
// The n-byte UTF8 code unit will fit in two CharT units.
|
||||||
if (Action == Copy) {
|
dst(char16_t((v >> 10) + 0xD800));
|
||||||
dst[j] = CharT((v >> 10) + 0xD800);
|
dst(char16_t((v & 0x3FF) + 0xDC00));
|
||||||
}
|
|
||||||
j++;
|
|
||||||
if (Action == Copy) {
|
|
||||||
dst[j] = CharT((v & 0x3FF) + 0xDC00);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// The n-byte UTF8 code unit won't fit in two CharT units.
|
// The n-byte UTF8 code unit won't fit in two CharT units.
|
||||||
INVALID(ReportTooBigCharacter, v, 1);
|
INVALID(ReportTooBigCharacter, v, 1);
|
||||||
|
@ -420,10 +399,6 @@ InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstle
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Action != Nop && Action != FindEncoding) {
|
|
||||||
*dstlenp = j;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,12 +407,19 @@ static CharsT
|
||||||
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
||||||
{
|
{
|
||||||
using CharT = typename CharsT::CharT;
|
using CharT = typename CharsT::CharT;
|
||||||
|
static_assert(std::is_same<CharT, char16_t>::value ||
|
||||||
|
std::is_same<CharT, Latin1Char>::value,
|
||||||
|
"bad CharT");
|
||||||
|
|
||||||
*outlen = 0;
|
*outlen = 0;
|
||||||
|
|
||||||
JS::SmallestEncoding encoding;
|
JS::SmallestEncoding encoding;
|
||||||
if (!InflateUTF8ToUTF16<Count, ErrorAction, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) {
|
size_t len = 0;
|
||||||
|
auto count = [&](char16_t) { len++; };
|
||||||
|
if (!InflateUTF8ToUTF16<Count, ErrorAction>(cx, src, count, &encoding)) {
|
||||||
return CharsT();
|
return CharsT();
|
||||||
}
|
}
|
||||||
|
*outlen = len;
|
||||||
|
|
||||||
CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1); // +1 for NUL
|
CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1); // +1 for NUL
|
||||||
if (!dst) {
|
if (!dst) {
|
||||||
|
@ -451,12 +433,15 @@ InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
||||||
for (uint32_t i = 0; i < srclen; i++) {
|
for (uint32_t i = 0; i < srclen; i++) {
|
||||||
dst[i] = CharT(src[i]);
|
dst[i] = CharT(src[i]);
|
||||||
}
|
}
|
||||||
} else if (std::is_same<decltype(dst[0]), Latin1Char>::value) {
|
|
||||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertQuestionMark, CharT>(cx, src, dst, outlen, &encoding)));
|
|
||||||
} else {
|
} else {
|
||||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding)));
|
constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
|
||||||
|
? OnUTF8Error::InsertQuestionMark
|
||||||
|
: OnUTF8Error::InsertReplacementCharacter;
|
||||||
|
size_t j = 0;
|
||||||
|
auto push = [&](char16_t c) { dst[j++] = CharT(c); };
|
||||||
|
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, errorMode>(cx, src, push, &encoding)));
|
||||||
|
MOZ_ASSERT(j == len);
|
||||||
}
|
}
|
||||||
|
|
||||||
dst[*outlen] = 0; // NUL char
|
dst[*outlen] = 0; // NUL char
|
||||||
|
|
||||||
return CharsT(dst, *outlen);
|
return CharsT(dst, *outlen);
|
||||||
|
@ -492,11 +477,10 @@ JS::SmallestEncoding
|
||||||
JS::FindSmallestEncoding(UTF8Chars utf8)
|
JS::FindSmallestEncoding(UTF8Chars utf8)
|
||||||
{
|
{
|
||||||
JS::SmallestEncoding encoding;
|
JS::SmallestEncoding encoding;
|
||||||
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter, char16_t>(
|
MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter>(
|
||||||
/* cx = */ nullptr,
|
/* cx = */ nullptr,
|
||||||
utf8,
|
utf8,
|
||||||
/* dst = */ nullptr,
|
[](char16_t) {},
|
||||||
/* dstlen = */ nullptr,
|
|
||||||
&encoding)));
|
&encoding)));
|
||||||
return encoding;
|
return encoding;
|
||||||
}
|
}
|
||||||
|
@ -519,11 +503,10 @@ JS::ConstUTF8CharsZ::validate(size_t aLength)
|
||||||
{
|
{
|
||||||
MOZ_ASSERT(data_);
|
MOZ_ASSERT(data_);
|
||||||
UTF8Chars chars(data_, aLength);
|
UTF8Chars chars(data_, aLength);
|
||||||
InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash, char16_t>(
|
InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash>(
|
||||||
/* cx = */ nullptr,
|
/* cx = */ nullptr,
|
||||||
chars,
|
chars,
|
||||||
/* dst = */ nullptr,
|
[](char16_t) {},
|
||||||
/* dstlen = */ nullptr,
|
|
||||||
/* smallestEncoding = */ nullptr);
|
/* smallestEncoding = */ nullptr);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
Загрузка…
Ссылка в новой задаче