gecko-dev/intl/uconv/nsScriptableUConv.cpp

262 строки
8.0 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsString.h"
#include "nsIScriptableUConv.h"
#include "nsScriptableUConv.h"
#include "nsIStringStream.h"
#include "nsComponentManagerUtils.h"
using namespace mozilla;
/* Implementation file */
NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
: mIsInternal(false) {}
nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
nsACString& _retval) {
if (!mEncoder) return NS_ERROR_FAILURE;
// We can compute the length without replacement, because the
// the replacement is only one byte long and a mappable character
// would always output something, i.e. at least one byte.
// When encoding to ISO-2022-JP, unmappables shouldn't be able
// to cause more escape sequences to be emitted than the mappable
// worst case where every input character causes an escape into
// a different state.
CheckedInt<size_t> needed =
mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
if (!needed.isValid() || needed.value() > UINT32_MAX) {
return NS_ERROR_OUT_OF_MEMORY;
}
if (!_retval.SetLength(needed.value(), fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
auto src = MakeSpan(aSrc);
auto dst = AsWritableBytes(MakeSpan(_retval));
size_t totalWritten = 0;
for (;;) {
uint32_t result;
size_t read;
size_t written;
Tie(result, read, written) =
mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
if (result != kInputEmpty && result != kOutputFull) {
MOZ_RELEASE_ASSERT(written < dst.Length(),
"Unmappables with one-byte replacement should not "
"exceed mappable worst case.");
dst[written++] = '?';
}
totalWritten += written;
if (result == kInputEmpty) {
MOZ_ASSERT(totalWritten <= UINT32_MAX);
if (!_retval.SetLength(totalWritten, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
return NS_OK;
}
src = src.From(read);
dst = dst.From(written);
}
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
// The documentation for this method says it should be called after
// ConvertFromUnicode(). However, our own tests called it after
// convertFromByteArray(), i.e. when *decoding*.
// Assuming that there exists extensions that similarly call
// this at the wrong time, let's deal. In general, it is a design
// error for this class to handle conversions in both directions.
if (!mEncoder) {
_retval.Truncate();
mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
return NS_OK;
}
// If we are encoding to ISO-2022-JP, potentially
// transition back to the ASCII state. The buffer
// needs to be large enough for an additional NCR,
// though.
_retval.SetLength(13);
Span<char16_t> src(nullptr);
uint32_t result;
size_t read;
size_t written;
bool hadErrors;
Tie(result, read, written, hadErrors) =
mEncoder->EncodeFromUTF16(src, _retval, true);
Unused << hadErrors;
MOZ_ASSERT(!read);
MOZ_ASSERT(result == kInputEmpty);
_retval.SetLength(written);
mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
mEncoder->Encoding()->NewEncoderInto(*mEncoder);
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
nsAString& _retval) {
if (!mDecoder) return NS_ERROR_FAILURE;
uint32_t length = aSrc.Length();
CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
if (!needed.isValid() || needed.value() > UINT32_MAX) {
return NS_ERROR_OUT_OF_MEMORY;
}
if (!_retval.SetLength(needed.value(), fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
auto src =
MakeSpan(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
uint32_t result;
size_t read;
size_t written;
bool hadErrors;
// The UTF-8 decoder used to throw regardless of the error behavior.
// Simulating the old behavior for compatibility with legacy callers.
// If callers want control over the behavior, they should switch to
// TextDecoder.
if (mDecoder->Encoding() == UTF_8_ENCODING) {
Tie(result, read, written) =
mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false);
if (result != kInputEmpty) {
return NS_ERROR_UDEC_ILLEGALINPUT;
}
} else {
Tie(result, read, written, hadErrors) =
mDecoder->DecodeToUTF16(src, _retval, false);
}
MOZ_ASSERT(result == kInputEmpty);
MOZ_ASSERT(read == length);
MOZ_ASSERT(written <= needed.value());
Unused << hadErrors;
if (!_retval.SetLength(written, fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
uint32_t* aLen,
uint8_t** _aData) {
if (!mEncoder) return NS_ERROR_FAILURE;
CheckedInt<size_t> needed =
mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
if (!needed.isValid() || needed.value() > UINT32_MAX) {
return NS_ERROR_OUT_OF_MEMORY;
}
uint8_t* data = (uint8_t*)malloc(needed.value());
if (!data) {
return NS_ERROR_OUT_OF_MEMORY;
}
auto src = MakeSpan(aString);
auto dst = MakeSpan(data, needed.value());
size_t totalWritten = 0;
for (;;) {
uint32_t result;
size_t read;
size_t written;
Tie(result, read, written) =
mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
if (result != kInputEmpty && result != kOutputFull) {
// There's always room for one byte in the case of
// an unmappable character, because otherwise
// we'd have gotten `kOutputFull`.
dst[written++] = '?';
}
totalWritten += written;
if (result == kInputEmpty) {
*_aData = data;
MOZ_ASSERT(totalWritten <= UINT32_MAX);
*aLen = totalWritten;
return NS_OK;
}
src = src.From(read);
dst = dst.From(written);
}
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
nsIInputStream** _retval) {
nsresult rv;
nsCOMPtr<nsIStringInputStream> inputStream =
do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
if (NS_FAILED(rv)) return rv;
uint8_t* data;
uint32_t dataLen;
rv = ConvertToByteArray(aString, &dataLen, &data);
if (NS_FAILED(rv)) return rv;
rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
if (NS_FAILED(rv)) {
free(data);
return rv;
}
NS_ADDREF(*_retval = inputStream);
return rv;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
if (!mDecoder) {
aCharset.Truncate();
} else {
mDecoder->Encoding()->Name(aCharset);
}
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
return InitConverter(aCharset);
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
*aIsInternal = mIsInternal;
return NS_OK;
}
NS_IMETHODIMP
nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
mIsInternal = aIsInternal;
return NS_OK;
}
nsresult nsScriptableUnicodeConverter::InitConverter(
const nsACString& aCharset) {
mEncoder = nullptr;
mDecoder = nullptr;
auto encoding = Encoding::ForLabelNoReplacement(aCharset);
if (!encoding) {
return NS_ERROR_UCONV_NOCONV;
}
if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
mEncoder = encoding->NewEncoder();
}
mDecoder = encoding->NewDecoderWithBOMRemoval();
return NS_OK;
}