Bug 1289003 - Part 1: Add UTF8CharsToNewLatin1CharsZ, LossyUTF8CharsToNewLatin1CharsZ. r=jwalden

This commit is contained in:
Tooru Fujisawa 2016-08-13 23:03:31 +09:00
Родитель 382083077f
Коммит 586c7b1a14
2 изменённых файлов: 83 добавлений и 31 удалений

Просмотреть файл

@ -31,6 +31,8 @@ class Latin1Chars : public mozilla::Range<Latin1Char>
typedef mozilla::Range<Latin1Char> Base;
public:
using CharT = Latin1Char;
Latin1Chars() : Base() {}
Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
Latin1Chars(const Latin1Char* aBytes, size_t aLength)
@ -49,6 +51,8 @@ class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
typedef mozilla::RangedPtr<Latin1Char> Base;
public:
using CharT = Latin1Char;
Latin1CharsZ() : Base(nullptr, 0) {}
Latin1CharsZ(char* aBytes, size_t aLength)
@ -73,6 +77,8 @@ class UTF8Chars : public mozilla::Range<unsigned char>
typedef mozilla::Range<unsigned char> Base;
public:
using CharT = unsigned char;
UTF8Chars() : Base() {}
UTF8Chars(char* aBytes, size_t aLength)
: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
@ -90,6 +96,8 @@ class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
typedef mozilla::RangedPtr<unsigned char> Base;
public:
using CharT = unsigned char;
UTF8CharsZ() : Base(nullptr, 0) {}
UTF8CharsZ(char* aBytes, size_t aLength)
@ -120,6 +128,8 @@ class ConstUTF8CharsZ
const char* data_;
public:
using CharT = unsigned char;
ConstUTF8CharsZ() : data_(nullptr)
{}
@ -157,6 +167,8 @@ class TwoByteChars : public mozilla::Range<char16_t>
typedef mozilla::Range<char16_t> Base;
public:
using CharT = char16_t;
TwoByteChars() : Base() {}
TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
@ -170,6 +182,8 @@ class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
typedef mozilla::RangedPtr<char16_t> Base;
public:
using CharT = char16_t;
TwoByteCharsZ() : Base(nullptr, 0) {}
TwoByteCharsZ(char16_t* chars, size_t length)
@ -191,6 +205,8 @@ class ConstTwoByteChars : public mozilla::Range<const char16_t>
typedef mozilla::Range<const char16_t> Base;
public:
using CharT = char16_t;
ConstTwoByteChars() : Base() {}
ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
};
@ -272,6 +288,23 @@ JS_PUBLIC_API(void)
DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);
/*
* Return a null-terminated Latin-1 string copied from the input string,
* storing its length (excluding null terminator) in |*outlen|. Fail and
* report an error if the string contains non-Latin-1 codepoints. Returns
* Latin1CharsZ() on failure.
*/
extern Latin1CharsZ
UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
/*
* Return a null-terminated Latin-1 string copied from the input string,
* storing its length (excluding null terminator) in |*outlen|. Non-Latin-1
* codepoints are replaced by '?'. Returns Latin1CharsZ() on failure.
*/
extern Latin1CharsZ
LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
} // namespace JS
inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }

Просмотреть файл

@ -8,6 +8,8 @@
#include "mozilla/Range.h"
#include <type_traits>
#include "jscntxt.h"
#include "jsprf.h"
@ -253,19 +255,20 @@ enum InflateUTF8Action {
Copy
};
static const uint32_t REPLACE_UTF8 = 0xFFFD;
static const char16_t REPLACE_UTF8 = 0xFFFD;
static const Latin1Char REPLACE_UTF8_LATIN1 = '?';
// If making changes to this algorithm, make sure to also update
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
template <InflateUTF8Action Action>
template <InflateUTF8Action Action, typename CharT>
static bool
InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, size_t* dstlenp,
InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
bool* isAsciip)
{
if (Action != AssertNoInvalids)
*isAsciip = true;
// Count how many char16_t characters need to be in the inflated string.
// Count how many code units need to be in the inflated string.
// |i| is the index into |src|, and |j| is the the index into |dst|.
size_t srclen = src.length();
uint32_t j = 0;
@ -274,7 +277,7 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
if (!(v & 0x80)) {
// ASCII code unit. Simple copy.
if (Action == Copy)
dst[j] = char16_t(v);
dst[j] = CharT(v);
} else {
// Non-ASCII code unit. Determine its length in bytes (n).
@ -292,10 +295,14 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
} else if (Action == AssertNoInvalids) { \
MOZ_CRASH("invalid UTF-8 string: " # report); \
} else { \
if (Action == Copy) \
dst[j] = char16_t(REPLACE_UTF8); \
else \
if (Action == Copy) { \
if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
dst[j] = CharT(REPLACE_UTF8_LATIN1); \
else \
dst[j] = CharT(REPLACE_UTF8); \
} else { \
MOZ_ASSERT(Action == CountAndIgnoreInvalids); \
} \
n = n2; \
goto invalidMultiByteCodeUnit; \
} \
@ -324,25 +331,24 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
if ((src[i + m] & 0xC0) != 0x80)
INVALID(ReportInvalidCharacter, i, m);
// Determine the code unit's length in char16_t and act accordingly.
// Determine the code unit's length in CharT and act accordingly.
v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
if (v < 0x10000) {
// The n-byte UTF8 code unit will fit in a single char16_t.
// The n-byte UTF8 code unit will fit in a single CharT.
if (Action == Copy)
dst[j] = char16_t(v);
dst[j] = CharT(v);
} else {
v -= 0x10000;
if (v <= 0xFFFFF) {
// The n-byte UTF8 code unit will fit in two char16_t units.
// The n-byte UTF8 code unit will fit in two CharT units.
if (Action == Copy)
dst[j] = char16_t((v >> 10) + 0xD800);
dst[j] = CharT((v >> 10) + 0xD800);
j++;
if (Action == Copy)
dst[j] = char16_t((v & 0x3FF) + 0xDC00);
dst[j] = CharT((v & 0x3FF) + 0xDC00);
} else {
// The n-byte UTF8 code unit won't fit in two char16_t units.
// The n-byte UTF8 code unit won't fit in two CharT units.
INVALID(ReportTooBigCharacter, v, 1);
}
}
@ -361,61 +367,73 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
return true;
}
template <InflateUTF8Action Action>
static TwoByteCharsZ
template <InflateUTF8Action Action, typename CharsT>
static CharsT
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
{
using CharT = typename CharsT::CharT;
*outlen = 0;
bool isAscii;
if (!InflateUTF8StringToBuffer<Action>(cx, src, /* dst = */ nullptr, outlen, &isAscii))
return TwoByteCharsZ();
if (!InflateUTF8StringToBuffer<Action, CharT>(cx, src, /* dst = */ nullptr, outlen, &isAscii))
return CharsT();
char16_t* dst = cx->pod_malloc<char16_t>(*outlen + 1); // +1 for NUL
CharT* dst = cx->pod_malloc<CharT>(*outlen + 1); // +1 for NUL
if (!dst) {
ReportOutOfMemory(cx);
return TwoByteCharsZ();
return CharsT();
}
if (isAscii) {
size_t srclen = src.length();
MOZ_ASSERT(*outlen == srclen);
for (uint32_t i = 0; i < srclen; i++)
dst[i] = char16_t(src[i]);
dst[i] = CharT(src[i]);
} else {
JS_ALWAYS_TRUE(InflateUTF8StringToBuffer<Copy>(cx, src, dst, outlen, &isAscii));
JS_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, CharT>(cx, src, dst, outlen, &isAscii)));
}
dst[*outlen] = 0; // NUL char
return TwoByteCharsZ(dst, *outlen);
return CharsT(dst, *outlen);
}
TwoByteCharsZ
JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
{
return InflateUTF8StringHelper<CountAndReportInvalids>(cx, utf8, outlen);
return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, utf8, outlen);
}
TwoByteCharsZ
JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
{
UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
return InflateUTF8StringHelper<CountAndReportInvalids>(cx, chars, outlen);
return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, chars, outlen);
}
TwoByteCharsZ
JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
{
return InflateUTF8StringHelper<CountAndIgnoreInvalids>(cx, utf8, outlen);
return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, utf8, outlen);
}
TwoByteCharsZ
JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
{
UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
return InflateUTF8StringHelper<CountAndIgnoreInvalids>(cx, chars, outlen);
return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, chars, outlen);
}
Latin1CharsZ
JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
{
return InflateUTF8StringHelper<CountAndReportInvalids, Latin1CharsZ>(cx, utf8, outlen);
}
Latin1CharsZ
JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
{
return InflateUTF8StringHelper<CountAndIgnoreInvalids, Latin1CharsZ>(cx, utf8, outlen);
}
#ifdef DEBUG
@ -424,6 +442,7 @@ JS::ConstUTF8CharsZ::validate(size_t aLength)
{
MOZ_ASSERT(data_);
UTF8Chars chars(data_, aLength);
InflateUTF8StringToBuffer<AssertNoInvalids>(nullptr, chars, nullptr, nullptr, nullptr);
InflateUTF8StringToBuffer<AssertNoInvalids, char16_t>(nullptr, chars, nullptr, nullptr,
nullptr);
}
#endif