зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1289003 - Part 1: Add UTF8CharsToNewLatin1CharsZ, LossyUTF8CharsToNewLatin1CharsZ. r=jwalden
This commit is contained in:
Родитель
382083077f
Коммит
586c7b1a14
|
@ -31,6 +31,8 @@ class Latin1Chars : public mozilla::Range<Latin1Char>
|
|||
typedef mozilla::Range<Latin1Char> Base;
|
||||
|
||||
public:
|
||||
using CharT = Latin1Char;
|
||||
|
||||
Latin1Chars() : Base() {}
|
||||
Latin1Chars(char* aBytes, size_t aLength) : Base(reinterpret_cast<Latin1Char*>(aBytes), aLength) {}
|
||||
Latin1Chars(const Latin1Char* aBytes, size_t aLength)
|
||||
|
@ -49,6 +51,8 @@ class Latin1CharsZ : public mozilla::RangedPtr<Latin1Char>
|
|||
typedef mozilla::RangedPtr<Latin1Char> Base;
|
||||
|
||||
public:
|
||||
using CharT = Latin1Char;
|
||||
|
||||
Latin1CharsZ() : Base(nullptr, 0) {}
|
||||
|
||||
Latin1CharsZ(char* aBytes, size_t aLength)
|
||||
|
@ -73,6 +77,8 @@ class UTF8Chars : public mozilla::Range<unsigned char>
|
|||
typedef mozilla::Range<unsigned char> Base;
|
||||
|
||||
public:
|
||||
using CharT = unsigned char;
|
||||
|
||||
UTF8Chars() : Base() {}
|
||||
UTF8Chars(char* aBytes, size_t aLength)
|
||||
: Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
|
||||
|
@ -90,6 +96,8 @@ class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
|
|||
typedef mozilla::RangedPtr<unsigned char> Base;
|
||||
|
||||
public:
|
||||
using CharT = unsigned char;
|
||||
|
||||
UTF8CharsZ() : Base(nullptr, 0) {}
|
||||
|
||||
UTF8CharsZ(char* aBytes, size_t aLength)
|
||||
|
@ -120,6 +128,8 @@ class ConstUTF8CharsZ
|
|||
const char* data_;
|
||||
|
||||
public:
|
||||
using CharT = unsigned char;
|
||||
|
||||
ConstUTF8CharsZ() : data_(nullptr)
|
||||
{}
|
||||
|
||||
|
@ -157,6 +167,8 @@ class TwoByteChars : public mozilla::Range<char16_t>
|
|||
typedef mozilla::Range<char16_t> Base;
|
||||
|
||||
public:
|
||||
using CharT = char16_t;
|
||||
|
||||
TwoByteChars() : Base() {}
|
||||
TwoByteChars(char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
|
||||
TwoByteChars(const char16_t* aChars, size_t aLength) : Base(const_cast<char16_t*>(aChars), aLength) {}
|
||||
|
@ -170,6 +182,8 @@ class TwoByteCharsZ : public mozilla::RangedPtr<char16_t>
|
|||
typedef mozilla::RangedPtr<char16_t> Base;
|
||||
|
||||
public:
|
||||
using CharT = char16_t;
|
||||
|
||||
TwoByteCharsZ() : Base(nullptr, 0) {}
|
||||
|
||||
TwoByteCharsZ(char16_t* chars, size_t length)
|
||||
|
@ -191,6 +205,8 @@ class ConstTwoByteChars : public mozilla::Range<const char16_t>
|
|||
typedef mozilla::Range<const char16_t> Base;
|
||||
|
||||
public:
|
||||
using CharT = char16_t;
|
||||
|
||||
ConstTwoByteChars() : Base() {}
|
||||
ConstTwoByteChars(const char16_t* aChars, size_t aLength) : Base(aChars, aLength) {}
|
||||
};
|
||||
|
@ -272,6 +288,23 @@ JS_PUBLIC_API(void)
|
|||
DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
|
||||
size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);
|
||||
|
||||
/*
|
||||
* Return a null-terminated Latin-1 string copied from the input string,
|
||||
* storing its length (excluding null terminator) in |*outlen|. Fail and
|
||||
* report an error if the string contains non-Latin-1 codepoints. Returns
|
||||
* Latin1CharsZ() on failure.
|
||||
*/
|
||||
extern Latin1CharsZ
|
||||
UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
|
||||
|
||||
/*
|
||||
* Return a null-terminated Latin-1 string copied from the input string,
|
||||
* storing its length (excluding null terminator) in |*outlen|. Non-Latin-1
|
||||
* codepoints are replaced by '?'. Returns Latin1CharsZ() on failure.
|
||||
*/
|
||||
extern Latin1CharsZ
|
||||
LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
|
||||
|
||||
} // namespace JS
|
||||
|
||||
inline void JS_free(JS::Latin1CharsZ& ptr) { js_free((void*)ptr.get()); }
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include "mozilla/Range.h"
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "jscntxt.h"
|
||||
#include "jsprf.h"
|
||||
|
||||
|
@ -253,19 +255,20 @@ enum InflateUTF8Action {
|
|||
Copy
|
||||
};
|
||||
|
||||
static const uint32_t REPLACE_UTF8 = 0xFFFD;
|
||||
static const char16_t REPLACE_UTF8 = 0xFFFD;
|
||||
static const Latin1Char REPLACE_UTF8_LATIN1 = '?';
|
||||
|
||||
// If making changes to this algorithm, make sure to also update
|
||||
// LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
|
||||
template <InflateUTF8Action Action>
|
||||
template <InflateUTF8Action Action, typename CharT>
|
||||
static bool
|
||||
InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, size_t* dstlenp,
|
||||
InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
|
||||
bool* isAsciip)
|
||||
{
|
||||
if (Action != AssertNoInvalids)
|
||||
*isAsciip = true;
|
||||
|
||||
// Count how many char16_t characters need to be in the inflated string.
|
||||
// Count how many code units need to be in the inflated string.
|
||||
// |i| is the index into |src|, and |j| is the the index into |dst|.
|
||||
size_t srclen = src.length();
|
||||
uint32_t j = 0;
|
||||
|
@ -274,7 +277,7 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
|
|||
if (!(v & 0x80)) {
|
||||
// ASCII code unit. Simple copy.
|
||||
if (Action == Copy)
|
||||
dst[j] = char16_t(v);
|
||||
dst[j] = CharT(v);
|
||||
|
||||
} else {
|
||||
// Non-ASCII code unit. Determine its length in bytes (n).
|
||||
|
@ -292,10 +295,14 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
|
|||
} else if (Action == AssertNoInvalids) { \
|
||||
MOZ_CRASH("invalid UTF-8 string: " # report); \
|
||||
} else { \
|
||||
if (Action == Copy) \
|
||||
dst[j] = char16_t(REPLACE_UTF8); \
|
||||
else \
|
||||
if (Action == Copy) { \
|
||||
if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
|
||||
dst[j] = CharT(REPLACE_UTF8_LATIN1); \
|
||||
else \
|
||||
dst[j] = CharT(REPLACE_UTF8); \
|
||||
} else { \
|
||||
MOZ_ASSERT(Action == CountAndIgnoreInvalids); \
|
||||
} \
|
||||
n = n2; \
|
||||
goto invalidMultiByteCodeUnit; \
|
||||
} \
|
||||
|
@ -324,25 +331,24 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
|
|||
if ((src[i + m] & 0xC0) != 0x80)
|
||||
INVALID(ReportInvalidCharacter, i, m);
|
||||
|
||||
// Determine the code unit's length in char16_t and act accordingly.
|
||||
// Determine the code unit's length in CharT and act accordingly.
|
||||
v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
|
||||
if (v < 0x10000) {
|
||||
// The n-byte UTF8 code unit will fit in a single char16_t.
|
||||
// The n-byte UTF8 code unit will fit in a single CharT.
|
||||
if (Action == Copy)
|
||||
dst[j] = char16_t(v);
|
||||
|
||||
dst[j] = CharT(v);
|
||||
} else {
|
||||
v -= 0x10000;
|
||||
if (v <= 0xFFFFF) {
|
||||
// The n-byte UTF8 code unit will fit in two char16_t units.
|
||||
// The n-byte UTF8 code unit will fit in two CharT units.
|
||||
if (Action == Copy)
|
||||
dst[j] = char16_t((v >> 10) + 0xD800);
|
||||
dst[j] = CharT((v >> 10) + 0xD800);
|
||||
j++;
|
||||
if (Action == Copy)
|
||||
dst[j] = char16_t((v & 0x3FF) + 0xDC00);
|
||||
dst[j] = CharT((v & 0x3FF) + 0xDC00);
|
||||
|
||||
} else {
|
||||
// The n-byte UTF8 code unit won't fit in two char16_t units.
|
||||
// The n-byte UTF8 code unit won't fit in two CharT units.
|
||||
INVALID(ReportTooBigCharacter, v, 1);
|
||||
}
|
||||
}
|
||||
|
@ -361,61 +367,73 @@ InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, char16_t* dst, siz
|
|||
return true;
|
||||
}
|
||||
|
||||
template <InflateUTF8Action Action>
|
||||
static TwoByteCharsZ
|
||||
template <InflateUTF8Action Action, typename CharsT>
|
||||
static CharsT
|
||||
InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
|
||||
{
|
||||
using CharT = typename CharsT::CharT;
|
||||
*outlen = 0;
|
||||
|
||||
bool isAscii;
|
||||
if (!InflateUTF8StringToBuffer<Action>(cx, src, /* dst = */ nullptr, outlen, &isAscii))
|
||||
return TwoByteCharsZ();
|
||||
if (!InflateUTF8StringToBuffer<Action, CharT>(cx, src, /* dst = */ nullptr, outlen, &isAscii))
|
||||
return CharsT();
|
||||
|
||||
char16_t* dst = cx->pod_malloc<char16_t>(*outlen + 1); // +1 for NUL
|
||||
CharT* dst = cx->pod_malloc<CharT>(*outlen + 1); // +1 for NUL
|
||||
if (!dst) {
|
||||
ReportOutOfMemory(cx);
|
||||
return TwoByteCharsZ();
|
||||
return CharsT();
|
||||
}
|
||||
|
||||
if (isAscii) {
|
||||
size_t srclen = src.length();
|
||||
MOZ_ASSERT(*outlen == srclen);
|
||||
for (uint32_t i = 0; i < srclen; i++)
|
||||
dst[i] = char16_t(src[i]);
|
||||
|
||||
dst[i] = CharT(src[i]);
|
||||
} else {
|
||||
JS_ALWAYS_TRUE(InflateUTF8StringToBuffer<Copy>(cx, src, dst, outlen, &isAscii));
|
||||
JS_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, CharT>(cx, src, dst, outlen, &isAscii)));
|
||||
}
|
||||
|
||||
dst[*outlen] = 0; // NUL char
|
||||
|
||||
return TwoByteCharsZ(dst, *outlen);
|
||||
return CharsT(dst, *outlen);
|
||||
}
|
||||
|
||||
TwoByteCharsZ
|
||||
JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
|
||||
{
|
||||
return InflateUTF8StringHelper<CountAndReportInvalids>(cx, utf8, outlen);
|
||||
return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, utf8, outlen);
|
||||
}
|
||||
|
||||
TwoByteCharsZ
|
||||
JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
|
||||
{
|
||||
UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
|
||||
return InflateUTF8StringHelper<CountAndReportInvalids>(cx, chars, outlen);
|
||||
return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, chars, outlen);
|
||||
}
|
||||
|
||||
TwoByteCharsZ
|
||||
JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
|
||||
{
|
||||
return InflateUTF8StringHelper<CountAndIgnoreInvalids>(cx, utf8, outlen);
|
||||
return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, utf8, outlen);
|
||||
}
|
||||
|
||||
TwoByteCharsZ
|
||||
JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
|
||||
{
|
||||
UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
|
||||
return InflateUTF8StringHelper<CountAndIgnoreInvalids>(cx, chars, outlen);
|
||||
return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, chars, outlen);
|
||||
}
|
||||
|
||||
Latin1CharsZ
|
||||
JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
|
||||
{
|
||||
return InflateUTF8StringHelper<CountAndReportInvalids, Latin1CharsZ>(cx, utf8, outlen);
|
||||
}
|
||||
|
||||
Latin1CharsZ
|
||||
JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
|
||||
{
|
||||
return InflateUTF8StringHelper<CountAndIgnoreInvalids, Latin1CharsZ>(cx, utf8, outlen);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
@ -424,6 +442,7 @@ JS::ConstUTF8CharsZ::validate(size_t aLength)
|
|||
{
|
||||
MOZ_ASSERT(data_);
|
||||
UTF8Chars chars(data_, aLength);
|
||||
InflateUTF8StringToBuffer<AssertNoInvalids>(nullptr, chars, nullptr, nullptr, nullptr);
|
||||
InflateUTF8StringToBuffer<AssertNoInvalids, char16_t>(nullptr, chars, nullptr, nullptr,
|
||||
nullptr);
|
||||
}
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче