gecko-dev/intl/uconv/nsScriptableUConv.cpp


/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsString.h"
#include "nsIScriptableUConv.h"
#include "nsScriptableUConv.h"
#include "nsIStringStream.h"
#include "nsComponentManagerUtils.h"

using namespace mozilla;

/* Implementation file */
NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)

nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
    : mIsInternal(false) {}

nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;

NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
                                                 nsACString& _retval) {
  if (!mEncoder) return NS_ERROR_FAILURE;

  // We can compute the length without replacement, because the
  // the replacement is only one byte long and a mappable character
  // would always output something, i.e. at least one byte.
  // When encoding to ISO-2022-JP, unmappables shouldn't be able
  // to cause more escape sequences to be emitted than the mappable
  // worst case where every input character causes an escape into
  // a different state.
  CheckedInt<size_t> needed =
      mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  auto dstChars = _retval.GetMutableData(needed.value(), fallible);
  if (!dstChars) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  auto src = Span(aSrc);
  auto dst = AsWritableBytes(*dstChars);
  size_t totalWritten = 0;
  for (;;) {
    uint32_t result;
    size_t read;
    size_t written;
    Tie(result, read, written) =
        mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
    if (result != kInputEmpty && result != kOutputFull) {
      MOZ_RELEASE_ASSERT(written < dst.Length(),
                         "Unmappables with one-byte replacement should not "
                         "exceed mappable worst case.");
      dst[written++] = '?';
    }
    totalWritten += written;
    if (result == kInputEmpty) {
      MOZ_ASSERT(totalWritten <= UINT32_MAX);
      if (!_retval.SetLength(totalWritten, fallible)) {
        return NS_ERROR_OUT_OF_MEMORY;
      }
      return NS_OK;
    }
    src = src.From(read);
    dst = dst.From(written);
  }
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
  // The documentation for this method says it should be called after
  // ConvertFromUnicode(). However, our own tests called it after
  // convertFromByteArray(), i.e. when *decoding*.
  // Assuming that there exists extensions that similarly call
  // this at the wrong time, let's deal. In general, it is a design
  // error for this class to handle conversions in both directions.
  if (!mEncoder) {
    _retval.Truncate();
    mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
    return NS_OK;
  }
  // If we are encoding to ISO-2022-JP, potentially
  // transition back to the ASCII state. The buffer
  // needs to be large enough for an additional NCR,
  // though.
  _retval.SetLength(13);
  auto dst = AsWritableBytes(_retval.GetMutableData(13));
  Span<char16_t> src(nullptr);
  uint32_t result;
  size_t read;
  size_t written;
  bool hadErrors;
  Tie(result, read, written, hadErrors) =
      mEncoder->EncodeFromUTF16(src, dst, true);
  Unused << hadErrors;
  MOZ_ASSERT(!read);
  MOZ_ASSERT(result == kInputEmpty);
  _retval.SetLength(written);

  mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
  mEncoder->Encoding()->NewEncoderInto(*mEncoder);
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
                                               nsAString& _retval) {
  if (!mDecoder) return NS_ERROR_FAILURE;

  uint32_t length = aSrc.Length();

  CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  auto dst = _retval.GetMutableData(needed.value(), fallible);
  if (!dst) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  auto src =
      Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
  uint32_t result;
  size_t read;
  size_t written;
  bool hadErrors;
  // The UTF-8 decoder used to throw regardless of the error behavior.
  // Simulating the old behavior for compatibility with legacy callers.
  // If callers want control over the behavior, they should switch to
  // TextDecoder.
  if (mDecoder->Encoding() == UTF_8_ENCODING) {
    Tie(result, read, written) =
        mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false);
    if (result != kInputEmpty) {
      return NS_ERROR_UDEC_ILLEGALINPUT;
    }
  } else {
    Tie(result, read, written, hadErrors) =
        mDecoder->DecodeToUTF16(src, *dst, false);
  }
  MOZ_ASSERT(result == kInputEmpty);
  MOZ_ASSERT(read == length);
  MOZ_ASSERT(written <= needed.value());
  Unused << hadErrors;
  if (!_retval.SetLength(written, fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
                                                 uint32_t* aLen,
                                                 uint8_t** _aData) {
  if (!mEncoder) return NS_ERROR_FAILURE;

  CheckedInt<size_t> needed =
      mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

  uint8_t* data = (uint8_t*)malloc(needed.value());
  if (!data) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  auto src = Span(aString);
  auto dst = Span(data, needed.value());
  size_t totalWritten = 0;
  for (;;) {
    uint32_t result;
    size_t read;
    size_t written;
    Tie(result, read, written) =
        mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
    if (result != kInputEmpty && result != kOutputFull) {
      // There's always room for one byte in the case of
      // an unmappable character, because otherwise
      // we'd have gotten `kOutputFull`.
      dst[written++] = '?';
    }
    totalWritten += written;
    if (result == kInputEmpty) {
      *_aData = data;
      MOZ_ASSERT(totalWritten <= UINT32_MAX);
      *aLen = totalWritten;
      return NS_OK;
    }
    src = src.From(read);
    dst = dst.From(written);
  }
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
                                                   nsIInputStream** _retval) {
  nsresult rv;
  nsCOMPtr<nsIStringInputStream> inputStream =
      do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
  if (NS_FAILED(rv)) return rv;

  uint8_t* data;
  uint32_t dataLen;
  rv = ConvertToByteArray(aString, &dataLen, &data);
  if (NS_FAILED(rv)) return rv;

  rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
  if (NS_FAILED(rv)) {
    free(data);
    return rv;
  }

  NS_ADDREF(*_retval = inputStream);
  return rv;
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
  if (!mDecoder) {
    aCharset.Truncate();
  } else {
    mDecoder->Encoding()->Name(aCharset);
  }
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
  return InitConverter(aCharset);
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
  *aIsInternal = mIsInternal;
  return NS_OK;
}

NS_IMETHODIMP
nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
  mIsInternal = aIsInternal;
  return NS_OK;
}

nsresult nsScriptableUnicodeConverter::InitConverter(
    const nsACString& aCharset) {
  mEncoder = nullptr;
  mDecoder = nullptr;

  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
  if (!encoding) {
    return NS_ERROR_UCONV_NOCONV;
  }
  if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
    mEncoder = encoding->NewEncoder();
  }
  mDecoder = encoding->NewDecoderWithBOMRemoval();
  return NS_OK;
}