зеркало из https://github.com/mozilla/gecko-dev.git
278 строки
7.9 KiB
C++
278 строки
7.9 KiB
C++
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "nsString.h"
|
|
#include "nsIScriptableUConv.h"
|
|
#include "nsScriptableUConv.h"
|
|
#include "nsIStringStream.h"
|
|
#include "nsComponentManagerUtils.h"
|
|
|
|
using namespace mozilla;
|
|
|
|
/* Implementation file */
|
|
NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
|
|
|
|
nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
|
|
: mIsInternal(false)
|
|
{
|
|
}
|
|
|
|
nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter()
|
|
{
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
|
|
nsACString& _retval)
|
|
{
|
|
if (!mEncoder)
|
|
return NS_ERROR_FAILURE;
|
|
|
|
// We can compute the length without replacement, because the
|
|
// the replacement is only one byte long and a mappable character
|
|
// would always output something, i.e. at least one byte.
|
|
// When encoding to ISO-2022-JP, unmappables shouldn't be able
|
|
// to cause more escape sequences to be emitted than the mappable
|
|
// worst case where every input character causes an escape into
|
|
// a different state.
|
|
CheckedInt<size_t> needed =
|
|
mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
|
|
if (!needed.isValid() || needed.value() > UINT32_MAX) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (!_retval.SetLength(needed.value(), fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
auto src = MakeSpan(aSrc);
|
|
auto dst = AsWritableBytes(MakeSpan(_retval));
|
|
size_t totalWritten = 0;
|
|
for (;;) {
|
|
uint32_t result;
|
|
size_t read;
|
|
size_t written;
|
|
Tie(result, read, written) =
|
|
mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
|
|
if (result != kInputEmpty && result != kOutputFull) {
|
|
MOZ_RELEASE_ASSERT(written < dst.Length(),
|
|
"Unmappables with one-byte replacement should not exceed mappable worst case.");
|
|
dst[written++] = '?';
|
|
}
|
|
totalWritten += written;
|
|
if (result == kInputEmpty) {
|
|
MOZ_ASSERT(totalWritten <= UINT32_MAX);
|
|
if (!_retval.SetLength(totalWritten, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
return NS_OK;
|
|
}
|
|
src = src.From(read);
|
|
dst = dst.From(written);
|
|
}
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::Finish(nsACString& _retval)
|
|
{
|
|
// The documentation for this method says it should be called after
|
|
// ConvertFromUnicode(). However, our own tests called it after
|
|
// convertFromByteArray(), i.e. when *decoding*.
|
|
// Assuming that there exists extensions that similarly call
|
|
// this at the wrong time, let's deal. In general, it is a design
|
|
// error for this class to handle conversions in both directions.
|
|
if (!mEncoder) {
|
|
_retval.Truncate();
|
|
mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
|
|
return NS_OK;
|
|
}
|
|
// If we are encoding to ISO-2022-JP, potentially
|
|
// transition back to the ASCII state. The buffer
|
|
// needs to be large enough for an additional NCR,
|
|
// though.
|
|
_retval.SetLength(13);
|
|
Span<char16_t> src(nullptr);
|
|
uint32_t result;
|
|
size_t read;
|
|
size_t written;
|
|
bool hadErrors;
|
|
Tie(result, read, written, hadErrors) =
|
|
mEncoder->EncodeFromUTF16(src, _retval, true);
|
|
Unused << hadErrors;
|
|
MOZ_ASSERT(!read);
|
|
MOZ_ASSERT(result == kInputEmpty);
|
|
_retval.SetLength(written);
|
|
|
|
mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
|
|
mEncoder->Encoding()->NewEncoderInto(*mEncoder);
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval)
|
|
{
|
|
if (!mDecoder)
|
|
return NS_ERROR_FAILURE;
|
|
|
|
uint32_t length = aSrc.Length();
|
|
|
|
CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
|
|
if (!needed.isValid() || needed.value() > UINT32_MAX) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
if (!_retval.SetLength(needed.value(), fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
auto src = MakeSpan(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
|
|
uint32_t result;
|
|
size_t read;
|
|
size_t written;
|
|
bool hadErrors;
|
|
// The UTF-8 decoder used to throw regardless of the error behavior.
|
|
// Simulating the old behavior for compatibility with legacy callers.
|
|
// If callers want control over the behavior, they should switch to
|
|
// TextDecoder.
|
|
if (mDecoder->Encoding() == UTF_8_ENCODING) {
|
|
Tie(result, read, written) =
|
|
mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false);
|
|
if (result != kInputEmpty) {
|
|
return NS_ERROR_UDEC_ILLEGALINPUT;
|
|
}
|
|
} else {
|
|
Tie(result, read, written, hadErrors) =
|
|
mDecoder->DecodeToUTF16(src, _retval, false);
|
|
}
|
|
MOZ_ASSERT(result == kInputEmpty);
|
|
MOZ_ASSERT(read == length);
|
|
MOZ_ASSERT(written <= needed.value());
|
|
Unused << hadErrors;
|
|
if (!_retval.SetLength(written, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
|
|
uint32_t* aLen,
|
|
uint8_t** _aData)
|
|
{
|
|
if (!mEncoder)
|
|
return NS_ERROR_FAILURE;
|
|
|
|
CheckedInt<size_t> needed =
|
|
mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
|
|
if (!needed.isValid() || needed.value() > UINT32_MAX) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
uint8_t* data = (uint8_t*)malloc(needed.value());
|
|
if (!data) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
auto src = MakeSpan(aString);
|
|
auto dst = MakeSpan(data, needed.value());
|
|
size_t totalWritten = 0;
|
|
for (;;) {
|
|
uint32_t result;
|
|
size_t read;
|
|
size_t written;
|
|
Tie(result, read, written) =
|
|
mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
|
|
if (result != kInputEmpty && result != kOutputFull) {
|
|
// There's always room for one byte in the case of
|
|
// an unmappable character, because otherwise
|
|
// we'd have gotten `kOutputFull`.
|
|
dst[written++] = '?';
|
|
}
|
|
totalWritten += written;
|
|
if (result == kInputEmpty) {
|
|
*_aData = data;
|
|
MOZ_ASSERT(totalWritten <= UINT32_MAX);
|
|
*aLen = totalWritten;
|
|
return NS_OK;
|
|
}
|
|
src = src.From(read);
|
|
dst = dst.From(written);
|
|
}
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
|
|
nsIInputStream** _retval)
|
|
{
|
|
nsresult rv;
|
|
nsCOMPtr<nsIStringInputStream> inputStream =
|
|
do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
|
|
if (NS_FAILED(rv))
|
|
return rv;
|
|
|
|
uint8_t* data;
|
|
uint32_t dataLen;
|
|
rv = ConvertToByteArray(aString, &dataLen, &data);
|
|
if (NS_FAILED(rv))
|
|
return rv;
|
|
|
|
rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
|
|
if (NS_FAILED(rv)) {
|
|
free(data);
|
|
return rv;
|
|
}
|
|
|
|
NS_ADDREF(*_retval = inputStream);
|
|
return rv;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset)
|
|
{
|
|
if (!mDecoder) {
|
|
aCharset.Truncate();
|
|
} else {
|
|
mDecoder->Encoding()->Name(aCharset);
|
|
}
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset)
|
|
{
|
|
return InitConverter(aCharset);
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal)
|
|
{
|
|
*aIsInternal = mIsInternal;
|
|
return NS_OK;
|
|
}
|
|
|
|
NS_IMETHODIMP
|
|
nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal)
|
|
{
|
|
mIsInternal = aIsInternal;
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
nsScriptableUnicodeConverter::InitConverter(const nsACString& aCharset)
|
|
{
|
|
mEncoder = nullptr;
|
|
mDecoder = nullptr;
|
|
|
|
auto encoding = Encoding::ForLabelNoReplacement(aCharset);
|
|
if (!encoding) {
|
|
return NS_ERROR_UCONV_NOCONV;
|
|
}
|
|
if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
|
|
mEncoder = encoding->NewEncoder();
|
|
}
|
|
mDecoder = encoding->NewDecoderWithBOMRemoval();
|
|
return NS_OK;
|
|
}
|