2001-12-01 01:48:47 +03:00
|
|
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2001-12-01 01:48:47 +03:00
|
|
|
|
|
|
|
#include "nsConverterInputStream.h"
|
2005-06-24 23:44:50 +04:00
|
|
|
#include "nsIInputStream.h"
|
2012-12-05 03:04:39 +04:00
|
|
|
#include "nsReadLine.h"
|
2013-08-20 15:03:50 +04:00
|
|
|
#include "nsStreamUtils.h"
|
2013-01-15 16:22:03 +04:00
|
|
|
#include <algorithm>
|
2017-04-27 13:27:03 +03:00
|
|
|
#include "mozilla/Unused.h"
|
2014-05-08 13:32:00 +04:00
|
|
|
|
2017-04-27 13:27:03 +03:00
|
|
|
using namespace mozilla;
|
2001-12-01 01:48:47 +03:00
|
|
|
|
|
|
|
#define CONVERTER_BUFFER_SIZE 8192
|
|
|
|
|
2014-04-27 11:06:00 +04:00
|
|
|
NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
|
|
|
|
nsIUnicharInputStream, nsIUnicharLineInputStream)
|
2013-10-18 03:09:20 +04:00
|
|
|
|
2001-12-01 01:48:47 +03:00
|
|
|
NS_IMETHODIMP
|
|
|
|
nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
|
2014-01-04 19:02:17 +04:00
|
|
|
int32_t aBufferSize, char16_t aReplacementChar) {
|
2014-05-08 13:32:00 +04:00
|
|
|
nsAutoCString label;
|
|
|
|
if (!aCharset) {
|
2017-04-27 13:27:03 +03:00
|
|
|
label.AssignLiteral("UTF-8");
|
|
|
|
} else {
|
|
|
|
label = aCharset;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto encoding = Encoding::ForLabelNoReplacement(label);
|
|
|
|
if (!encoding) {
|
|
|
|
return NS_ERROR_UCONV_NOCONV;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
2017-04-27 13:27:03 +03:00
|
|
|
// Previously, the implementation auto-switched only
|
|
|
|
// between the two UTF-16 variants and only when
|
|
|
|
// initialized with an endianness-unspecific label.
|
|
|
|
mConverter = encoding->NewDecoder();
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-04-27 13:27:03 +03:00
|
|
|
size_t outputBufferSize;
|
2015-05-18 23:50:34 +03:00
|
|
|
if (aBufferSize <= 0) {
|
|
|
|
aBufferSize = CONVERTER_BUFFER_SIZE;
|
2017-04-27 13:27:03 +03:00
|
|
|
outputBufferSize = CONVERTER_BUFFER_SIZE;
|
2018-11-30 13:46:48 +03:00
|
|
|
} else {
|
2015-05-18 23:50:34 +03:00
|
|
|
// NetUtil.jsm assumes that if buffer size equals
|
|
|
|
// the input size, the whole stream will be processed
|
|
|
|
// as one readString. This is not true with encoding_rs,
|
|
|
|
// because encoding_rs might want to see space for a
|
2017-04-27 13:27:03 +03:00
|
|
|
// surrogate pair, so let's compute a larger output
|
|
|
|
// buffer length.
|
|
|
|
CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
|
|
|
|
if (!needed.isValid()) {
|
2013-08-20 15:03:50 +04:00
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
2017-04-27 13:27:03 +03:00
|
|
|
outputBufferSize = needed.value();
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
2001-12-01 01:48:47 +03:00
|
|
|
|
2002-08-28 03:47:25 +04:00
|
|
|
// set up our buffers.
|
2015-05-18 23:50:34 +03:00
|
|
|
if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
|
2017-04-27 13:27:03 +03:00
|
|
|
!mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
|
2013-08-20 15:03:50 +04:00
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2001-12-01 01:48:47 +03:00
|
|
|
mInput = aStream;
|
2017-04-27 13:27:03 +03:00
|
|
|
mErrorsAreFatal = !aReplacementChar;
|
2001-12-01 01:48:47 +03:00
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_IMETHODIMP
|
|
|
|
nsConverterInputStream::Close() {
|
2005-06-25 03:06:11 +04:00
|
|
|
nsresult rv = mInput ? mInput->Close() : NS_OK;
|
2012-12-05 03:04:39 +04:00
|
|
|
mLineBuffer = nullptr;
|
2012-07-30 18:20:58 +04:00
|
|
|
mInput = nullptr;
|
|
|
|
mConverter = nullptr;
|
2013-08-20 15:03:50 +04:00
|
|
|
mByteData.Clear();
|
|
|
|
mUnicharData.Clear();
|
2005-06-24 23:44:50 +04:00
|
|
|
return rv;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
NS_IMETHODIMP
|
2014-01-04 19:02:17 +04:00
|
|
|
nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t* aReadCount) {
|
2001-12-01 01:48:47 +03:00
|
|
|
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
|
2005-06-24 23:44:50 +04:00
|
|
|
if (0 == readCount) {
|
2001-12-01 01:48:47 +03:00
|
|
|
// Fill the unichar buffer
|
2005-06-24 23:44:50 +04:00
|
|
|
readCount = Fill(&mLastErrorCode);
|
|
|
|
if (readCount == 0) {
|
2001-12-01 01:48:47 +03:00
|
|
|
*aReadCount = 0;
|
2002-08-28 03:47:25 +04:00
|
|
|
return mLastErrorCode;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
|
|
|
}
|
2005-06-24 23:44:50 +04:00
|
|
|
if (readCount > aCount) {
|
|
|
|
readCount = aCount;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
2013-08-20 15:03:50 +04:00
|
|
|
memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
|
2014-01-04 19:02:17 +04:00
|
|
|
readCount * sizeof(char16_t));
|
2005-06-24 23:44:50 +04:00
|
|
|
mUnicharDataOffset += readCount;
|
|
|
|
*aReadCount = readCount;
|
2001-12-01 01:48:47 +03:00
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
2003-03-06 22:54:51 +03:00
|
|
|
NS_IMETHODIMP
|
|
|
|
nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
|
|
|
|
void* aClosure, uint32_t aCount,
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t* aReadCount) {
|
2003-03-06 22:54:51 +03:00
|
|
|
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
|
2003-03-06 22:54:51 +03:00
|
|
|
nsresult rv;
|
|
|
|
if (0 == bytesToWrite) {
|
|
|
|
// Fill the unichar buffer
|
|
|
|
bytesToWrite = Fill(&rv);
|
|
|
|
if (bytesToWrite <= 0) {
|
|
|
|
*aReadCount = 0;
|
|
|
|
return rv;
|
|
|
|
}
|
2018-03-14 21:53:18 +03:00
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
return rv;
|
|
|
|
}
|
2003-03-06 22:54:51 +03:00
|
|
|
}
|
2017-04-27 13:27:03 +03:00
|
|
|
|
2003-03-06 22:54:51 +03:00
|
|
|
if (bytesToWrite > aCount) bytesToWrite = aCount;
|
2017-04-27 13:27:03 +03:00
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t bytesWritten;
|
|
|
|
uint32_t totalBytesWritten = 0;
|
2003-03-06 22:54:51 +03:00
|
|
|
|
|
|
|
while (bytesToWrite) {
|
2013-08-20 15:03:50 +04:00
|
|
|
rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
|
2003-03-06 22:54:51 +03:00
|
|
|
totalBytesWritten, bytesToWrite, &bytesWritten);
|
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
// don't propagate errors to the caller
|
|
|
|
break;
|
|
|
|
}
|
2017-04-27 13:27:03 +03:00
|
|
|
|
2003-03-06 22:54:51 +03:00
|
|
|
bytesToWrite -= bytesWritten;
|
|
|
|
totalBytesWritten += bytesWritten;
|
|
|
|
mUnicharDataOffset += bytesWritten;
|
|
|
|
}
|
|
|
|
|
|
|
|
*aReadCount = totalBytesWritten;
|
|
|
|
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
2005-06-24 23:44:50 +04:00
|
|
|
NS_IMETHODIMP
|
2012-08-22 19:56:38 +04:00
|
|
|
nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
|
|
|
|
uint32_t* aReadCount) {
|
2005-06-24 23:44:50 +04:00
|
|
|
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
|
2005-06-24 23:44:50 +04:00
|
|
|
if (0 == readCount) {
|
|
|
|
// Fill the unichar buffer
|
|
|
|
readCount = Fill(&mLastErrorCode);
|
|
|
|
if (readCount == 0) {
|
|
|
|
*aReadCount = 0;
|
|
|
|
return mLastErrorCode;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (readCount > aCount) {
|
|
|
|
readCount = aCount;
|
|
|
|
}
|
2014-01-04 19:02:17 +04:00
|
|
|
const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
|
2005-06-24 23:44:50 +04:00
|
|
|
aString.Assign(buf, readCount);
|
|
|
|
mUnicharDataOffset += readCount;
|
|
|
|
*aReadCount = readCount;
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
2001-12-01 01:48:47 +03:00
|
|
|
uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
|
2012-07-30 18:20:58 +04:00
|
|
|
if (nullptr == mInput) {
|
2001-12-01 01:48:47 +03:00
|
|
|
// We already closed the stream!
|
|
|
|
*aErrorCode = NS_BASE_STREAM_CLOSED;
|
2002-08-28 03:47:25 +04:00
|
|
|
return 0;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
|
|
|
|
2002-08-28 03:47:25 +04:00
|
|
|
if (NS_FAILED(mLastErrorCode)) {
|
|
|
|
// We failed to completely convert last time, and error-recovery
|
|
|
|
// is disabled. We will fare no better this time, so...
|
|
|
|
*aErrorCode = mLastErrorCode;
|
|
|
|
return 0;
|
|
|
|
}
|
2017-04-27 13:27:03 +03:00
|
|
|
|
2002-08-28 03:47:25 +04:00
|
|
|
// We assume a many to one conversion and are using equal sizes for
|
|
|
|
// the two buffers. However if an error happens at the very start
|
|
|
|
// of a byte buffer we may end up in a situation where n bytes lead
|
|
|
|
// to n+1 unicode chars. Thus we need to keep track of the leftover
|
|
|
|
// bytes as we convert.
|
2017-04-27 13:27:03 +03:00
|
|
|
|
2013-08-20 15:03:50 +04:00
|
|
|
uint32_t nb;
|
|
|
|
*aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
|
|
|
|
if (nb == 0 && mLeftOverBytes == 0) {
|
2017-04-27 13:27:03 +03:00
|
|
|
// No more data
|
2002-08-28 03:47:25 +04:00
|
|
|
*aErrorCode = NS_OK;
|
|
|
|
return 0;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
|
|
|
|
2013-08-20 15:03:50 +04:00
|
|
|
NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
|
2002-08-28 03:47:25 +04:00
|
|
|
"mByteData is lying to us somewhere");
|
2013-08-20 15:03:50 +04:00
|
|
|
|
2001-12-01 01:48:47 +03:00
|
|
|
// Now convert as much of the byte buffer to unicode as possible
|
2020-08-07 10:49:47 +03:00
|
|
|
auto src = AsBytes(Span(mByteData));
|
|
|
|
auto dst = Span(mUnicharData);
|
2017-04-27 13:27:03 +03:00
|
|
|
// mUnicharData.Length() is the buffer length, not the fill status.
|
|
|
|
// mUnicharDataLength reflects the current fill status.
|
2002-08-28 03:47:25 +04:00
|
|
|
mUnicharDataLength = 0;
|
2017-04-27 13:27:03 +03:00
|
|
|
// Whenever we convert, mUnicharData is logically empty.
|
|
|
|
mUnicharDataOffset = 0;
|
|
|
|
// Truncation from size_t to uint32_t below is OK, because the sizes
|
|
|
|
// are bounded by the lengths of mByteData and mUnicharData.
|
|
|
|
uint32_t result;
|
|
|
|
size_t read;
|
|
|
|
size_t written;
|
|
|
|
bool hadErrors;
|
|
|
|
// The design of this class is fundamentally bogus in that trailing
|
|
|
|
// errors are ignored. Always passing false as the last argument to
|
|
|
|
// Decode* calls below.
|
|
|
|
if (mErrorsAreFatal) {
|
|
|
|
Tie(result, read, written) =
|
|
|
|
mConverter->DecodeToUTF16WithoutReplacement(src, dst, false);
|
|
|
|
} else {
|
|
|
|
Tie(result, read, written, hadErrors) =
|
|
|
|
mConverter->DecodeToUTF16(src, dst, false);
|
|
|
|
}
|
|
|
|
Unused << hadErrors;
|
|
|
|
mLeftOverBytes = mByteData.Length() - read;
|
|
|
|
mUnicharDataLength = written;
|
|
|
|
if (result == kInputEmpty || result == kOutputFull) {
|
|
|
|
*aErrorCode = NS_OK;
|
|
|
|
} else {
|
|
|
|
MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
|
|
|
|
*aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
|
|
|
|
}
|
2002-08-28 03:47:25 +04:00
|
|
|
return mUnicharDataLength;
|
2001-12-01 01:48:47 +03:00
|
|
|
}
|
2005-06-24 23:44:50 +04:00
|
|
|
|
|
|
|
NS_IMETHODIMP
|
2011-09-29 10:19:26 +04:00
|
|
|
nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
|
2005-06-24 23:44:50 +04:00
|
|
|
if (!mLineBuffer) {
|
2020-04-04 00:05:34 +03:00
|
|
|
mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
|
2005-06-24 23:44:50 +04:00
|
|
|
}
|
2012-12-05 03:04:39 +04:00
|
|
|
return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
|
2005-06-24 23:44:50 +04:00
|
|
|
}
|