2001-09-29 00:14:13 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
1998-04-14 00:24:54 +04:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* The contents of this file are subject to the Netscape Public License
|
|
|
|
* Version 1.1 (the "License"); you may not use this file except in
|
|
|
|
* compliance with the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/NPL/
|
1998-04-14 00:24:54 +04:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
1998-04-14 00:24:54 +04:00
|
|
|
*
|
1999-11-06 06:43:54 +03:00
|
|
|
* The Original Code is mozilla.org code.
|
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Netscape Communications Corporation.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
1999-11-06 06:43:54 +03:00
|
|
|
*
|
2001-09-29 00:14:13 +04:00
|
|
|
* Contributor(s):
|
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the NPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the NPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
1999-02-03 21:55:10 +03:00
|
|
|
|
2000-05-15 02:46:24 +04:00
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
#include "nsIUnicharInputStream.h"
|
|
|
|
#include "nsIByteBuffer.h"
|
|
|
|
#include "nsIUnicharBuffer.h"
|
1999-02-03 21:55:10 +03:00
|
|
|
#include "nsIServiceManager.h"
|
1998-04-14 00:24:54 +04:00
|
|
|
#include "nsString.h"
|
|
|
|
#include "nsCRT.h"
|
|
|
|
#include <fcntl.h>
|
2000-05-03 02:38:04 +04:00
|
|
|
#if defined(NS_WIN32) || defined(XP_OS2_VACPP)
|
1998-04-14 00:24:54 +04:00
|
|
|
#include <io.h>
|
|
|
|
#else
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
class StringUnicharInputStream : public nsIUnicharInputStream {
|
|
|
|
public:
|
|
|
|
StringUnicharInputStream(nsString* aString);
|
1999-04-25 08:57:38 +04:00
|
|
|
virtual ~StringUnicharInputStream();
|
1998-04-14 00:24:54 +04:00
|
|
|
|
|
|
|
NS_DECL_ISUPPORTS
|
|
|
|
|
1998-07-24 00:34:01 +04:00
|
|
|
NS_IMETHOD Read(PRUnichar* aBuf,
|
1998-12-16 08:40:20 +03:00
|
|
|
PRUint32 aOffset,
|
|
|
|
PRUint32 aCount,
|
|
|
|
PRUint32 *aReadCount);
|
1998-07-24 00:34:01 +04:00
|
|
|
NS_IMETHOD Close();
|
1998-04-14 00:24:54 +04:00
|
|
|
|
|
|
|
nsString* mString;
|
1998-12-16 08:40:20 +03:00
|
|
|
PRUint32 mPos;
|
|
|
|
PRUint32 mLen;
|
1998-04-14 00:24:54 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
StringUnicharInputStream::StringUnicharInputStream(nsString* aString)
|
|
|
|
{
|
2002-09-07 21:13:19 +04:00
|
|
|
NS_INIT_ISUPPORTS();
|
1998-04-14 00:24:54 +04:00
|
|
|
mString = aString;
|
|
|
|
mPos = 0;
|
|
|
|
mLen = aString->Length();
|
|
|
|
}
|
|
|
|
|
|
|
|
StringUnicharInputStream::~StringUnicharInputStream()
|
|
|
|
{
|
|
|
|
if (nsnull != mString) {
|
|
|
|
delete mString;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1998-07-24 00:34:01 +04:00
|
|
|
nsresult StringUnicharInputStream::Read(PRUnichar* aBuf,
|
1998-12-16 08:40:20 +03:00
|
|
|
PRUint32 aOffset,
|
|
|
|
PRUint32 aCount,
|
|
|
|
PRUint32 *aReadCount)
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
|
|
|
if (mPos >= mLen) {
|
1998-07-24 00:34:01 +04:00
|
|
|
*aReadCount = 0;
|
1998-07-25 01:05:50 +04:00
|
|
|
return (nsresult)-1;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
2001-06-30 15:02:25 +04:00
|
|
|
const PRUnichar* us = mString->get();
|
1998-12-16 08:40:20 +03:00
|
|
|
NS_ASSERTION(mLen >= mPos, "unsigned madness");
|
|
|
|
PRUint32 amount = mLen - mPos;
|
1998-04-14 00:24:54 +04:00
|
|
|
if (amount > aCount) {
|
|
|
|
amount = aCount;
|
|
|
|
}
|
2002-01-12 06:18:55 +03:00
|
|
|
memcpy(aBuf, us + mPos, sizeof(PRUnichar) * amount);
|
1998-04-14 00:24:54 +04:00
|
|
|
mPos += amount;
|
1998-07-24 00:34:01 +04:00
|
|
|
*aReadCount = amount;
|
|
|
|
return NS_OK;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
|
|
|
|
1998-07-24 00:34:01 +04:00
|
|
|
nsresult StringUnicharInputStream::Close()
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
|
|
|
mPos = mLen;
|
|
|
|
if (nsnull != mString) {
|
|
|
|
delete mString;
|
2000-05-13 03:05:11 +04:00
|
|
|
mString = 0;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
1998-07-24 00:34:01 +04:00
|
|
|
return NS_OK;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
|
|
|
|
1999-08-23 14:14:16 +04:00
|
|
|
NS_IMPL_ISUPPORTS1(StringUnicharInputStream, nsIUnicharInputStream)
|
1998-04-14 00:24:54 +04:00
|
|
|
|
1999-05-26 05:38:36 +04:00
|
|
|
NS_COM nsresult
|
1998-04-14 00:24:54 +04:00
|
|
|
NS_NewStringUnicharInputStream(nsIUnicharInputStream** aInstancePtrResult,
|
|
|
|
nsString* aString)
|
|
|
|
{
|
|
|
|
NS_PRECONDITION(nsnull != aString, "null ptr");
|
|
|
|
NS_PRECONDITION(nsnull != aInstancePtrResult, "null ptr");
|
|
|
|
if ((nsnull == aString) || (nsnull == aInstancePtrResult)) {
|
|
|
|
return NS_ERROR_NULL_POINTER;
|
|
|
|
}
|
|
|
|
|
|
|
|
StringUnicharInputStream* it = new StringUnicharInputStream(aString);
|
|
|
|
if (nsnull == it) {
|
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
|
1999-08-23 14:14:16 +04:00
|
|
|
return it->QueryInterface(NS_GET_IID(nsIUnicharInputStream),
|
1998-04-14 00:24:54 +04:00
|
|
|
(void**) aInstancePtrResult);
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
class UTF8InputStream : public nsIUnicharInputStream {
|
1998-04-14 00:24:54 +04:00
|
|
|
public:
|
2001-12-04 04:10:43 +03:00
|
|
|
UTF8InputStream(nsIInputStream* aStream,
|
|
|
|
PRUint32 aBufSize);
|
|
|
|
virtual ~UTF8InputStream();
|
1998-04-14 00:24:54 +04:00
|
|
|
|
|
|
|
NS_DECL_ISUPPORTS
|
1998-07-24 00:34:01 +04:00
|
|
|
NS_IMETHOD Read(PRUnichar* aBuf,
|
1998-12-16 08:40:20 +03:00
|
|
|
PRUint32 aOffset,
|
|
|
|
PRUint32 aCount,
|
|
|
|
PRUint32 *aReadCount);
|
1998-07-24 00:34:01 +04:00
|
|
|
NS_IMETHOD Close();
|
1998-04-14 00:24:54 +04:00
|
|
|
|
|
|
|
protected:
|
1998-07-24 00:34:01 +04:00
|
|
|
PRInt32 Fill(nsresult * aErrorCode);
|
1998-04-14 00:24:54 +04:00
|
|
|
|
2002-01-25 02:53:53 +03:00
|
|
|
static void CountValidUTF8Bytes(const char *aBuf, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2bytes);
|
2001-12-04 04:10:43 +03:00
|
|
|
|
|
|
|
nsCOMPtr<nsIInputStream> mInput;
|
|
|
|
nsCOMPtr<nsIByteBuffer> mByteData;
|
|
|
|
nsCOMPtr<nsIUnicharBuffer> mUnicharData;
|
|
|
|
|
1998-12-16 08:40:20 +03:00
|
|
|
PRUint32 mByteDataOffset;
|
|
|
|
PRUint32 mUnicharDataOffset;
|
|
|
|
PRUint32 mUnicharDataLength;
|
1998-04-14 00:24:54 +04:00
|
|
|
};
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
UTF8InputStream::UTF8InputStream(nsIInputStream* aStream,
|
|
|
|
PRUint32 aBufferSize) :
|
|
|
|
mInput(aStream)
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
2002-09-07 21:13:19 +04:00
|
|
|
NS_INIT_ISUPPORTS();
|
1998-04-14 00:24:54 +04:00
|
|
|
if (aBufferSize == 0) {
|
|
|
|
aBufferSize = 8192;
|
|
|
|
}
|
1999-07-16 21:40:39 +04:00
|
|
|
|
|
|
|
// XXX what if these fail?
|
2001-12-04 04:10:43 +03:00
|
|
|
NS_NewByteBuffer(getter_AddRefs(mByteData), nsnull, aBufferSize);
|
|
|
|
NS_NewUnicharBuffer(getter_AddRefs(mUnicharData), nsnull, aBufferSize);
|
1999-07-16 21:40:39 +04:00
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
mByteDataOffset = 0;
|
|
|
|
mUnicharDataOffset = 0;
|
|
|
|
mUnicharDataLength = 0;
|
|
|
|
}
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
NS_IMPL_ISUPPORTS1(UTF8InputStream,nsIUnicharInputStream)
|
1998-04-14 00:24:54 +04:00
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
UTF8InputStream::~UTF8InputStream()
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
|
|
|
Close();
|
|
|
|
}
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
nsresult UTF8InputStream::Close()
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
2001-12-04 04:10:43 +03:00
|
|
|
mInput = nsnull;
|
|
|
|
mByteData = nsnull;
|
|
|
|
mUnicharData = nsnull;
|
1998-07-24 00:34:01 +04:00
|
|
|
|
|
|
|
return NS_OK;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
nsresult UTF8InputStream::Read(PRUnichar* aBuf,
|
|
|
|
PRUint32 aOffset,
|
|
|
|
PRUint32 aCount,
|
|
|
|
PRUint32 *aReadCount)
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
1998-12-16 08:40:20 +03:00
|
|
|
NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
|
|
|
|
PRUint32 rv = mUnicharDataLength - mUnicharDataOffset;
|
1998-07-24 00:34:01 +04:00
|
|
|
nsresult errorCode;
|
1998-04-14 00:24:54 +04:00
|
|
|
if (0 == rv) {
|
|
|
|
// Fill the unichar buffer
|
1998-07-24 00:34:01 +04:00
|
|
|
rv = Fill(&errorCode);
|
1998-04-14 00:24:54 +04:00
|
|
|
if (rv <= 0) {
|
1998-07-24 00:34:01 +04:00
|
|
|
*aReadCount = 0;
|
|
|
|
return errorCode;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (rv > aCount) {
|
|
|
|
rv = aCount;
|
|
|
|
}
|
2002-01-12 06:18:55 +03:00
|
|
|
memcpy(aBuf + aOffset, mUnicharData->GetBuffer() + mUnicharDataOffset,
|
2002-01-26 03:04:45 +03:00
|
|
|
rv * sizeof(PRUnichar));
|
1998-04-14 00:24:54 +04:00
|
|
|
mUnicharDataOffset += rv;
|
1998-07-24 00:34:01 +04:00
|
|
|
*aReadCount = rv;
|
|
|
|
return NS_OK;
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
PRInt32 UTF8InputStream::Fill(nsresult * aErrorCode)
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
|
|
|
if (nsnull == mInput) {
|
|
|
|
// We already closed the stream!
|
1998-07-24 00:34:01 +04:00
|
|
|
*aErrorCode = NS_BASE_STREAM_CLOSED;
|
1998-04-14 00:24:54 +04:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
1998-12-16 08:40:20 +03:00
|
|
|
NS_ASSERTION(mByteData->GetLength() >= mByteDataOffset, "unsigned madness");
|
|
|
|
PRUint32 remainder = mByteData->GetLength() - mByteDataOffset;
|
2002-01-12 01:44:30 +03:00
|
|
|
mByteDataOffset = remainder;
|
1998-04-14 00:24:54 +04:00
|
|
|
PRInt32 nb = mByteData->Fill(aErrorCode, mInput, remainder);
|
|
|
|
if (nb <= 0) {
|
|
|
|
// Because we assume a many to one conversion, the lingering data
|
|
|
|
// in the byte buffer must be a partial conversion
|
|
|
|
// fragment. Because we know that we have recieved no more new
|
|
|
|
// data to add to it, we can't convert it. Therefore, we discard
|
|
|
|
// it.
|
|
|
|
return nb;
|
|
|
|
}
|
|
|
|
NS_ASSERTION(remainder + nb == mByteData->GetLength(), "bad nb");
|
|
|
|
|
|
|
|
// Now convert as much of the byte buffer to unicode as possible
|
2002-01-25 02:53:53 +03:00
|
|
|
PRUint32 srcLen, dstLen;
|
|
|
|
CountValidUTF8Bytes(mByteData->GetBuffer(),remainder + nb, srcLen, dstLen);
|
2001-12-04 04:10:43 +03:00
|
|
|
|
|
|
|
// the number of UCS2 characters should always be <= the number of
|
|
|
|
// UTF8 chars
|
2002-01-25 02:53:53 +03:00
|
|
|
NS_ASSERTION( (remainder+nb >= srcLen), "cannot be longer than out buffer");
|
|
|
|
NS_ASSERTION(PRInt32(dstLen) <= mUnicharData->GetBufferSize(),
|
2001-12-04 04:10:43 +03:00
|
|
|
"Ouch. I would overflow my buffer if I wasn't so careful.");
|
2002-01-25 02:53:53 +03:00
|
|
|
if (PRInt32(dstLen) > mUnicharData->GetBufferSize()) return 0;
|
2001-12-04 04:10:43 +03:00
|
|
|
|
2002-01-25 02:53:53 +03:00
|
|
|
ConvertUTF8toUCS2 converter(mUnicharData->GetBuffer());
|
|
|
|
|
|
|
|
nsASingleFragmentCString::const_char_iterator start = mByteData->GetBuffer();
|
|
|
|
nsASingleFragmentCString::const_char_iterator end = mByteData->GetBuffer() + srcLen;
|
|
|
|
|
|
|
|
copy_string(start, end, converter);
|
|
|
|
NS_ASSERTION(converter.Length() == dstLen, "length mismatch");
|
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
mUnicharDataOffset = 0;
|
|
|
|
mUnicharDataLength = dstLen;
|
2002-01-12 05:38:08 +03:00
|
|
|
mByteDataOffset = srcLen;
|
2001-12-04 04:10:43 +03:00
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
return dstLen;
|
|
|
|
}
|
|
|
|
|
2002-01-25 02:53:53 +03:00
|
|
|
void
|
|
|
|
UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer, PRUint32 aMaxBytes, PRUint32& aValidUTF8bytes, PRUint32& aValidUCS2chars)
|
1998-04-14 00:24:54 +04:00
|
|
|
{
|
2001-12-04 04:10:43 +03:00
|
|
|
const char *c = aBuffer;
|
2001-12-05 06:49:13 +03:00
|
|
|
const char *end = aBuffer + aMaxBytes;
|
2002-01-12 05:38:08 +03:00
|
|
|
const char *lastchar = c; // pre-initialize in case of 0-length buffer
|
2002-01-25 02:53:53 +03:00
|
|
|
PRUint32 ucs2bytes = 0;
|
2001-12-05 06:49:13 +03:00
|
|
|
while (c < end && *c) {
|
2002-01-12 05:38:08 +03:00
|
|
|
lastchar = c;
|
2002-01-25 02:53:53 +03:00
|
|
|
ucs2bytes++;
|
|
|
|
|
2001-12-05 06:49:13 +03:00
|
|
|
if (UTF8traits::isASCII(*c))
|
2001-12-04 04:10:43 +03:00
|
|
|
c++;
|
2001-12-05 06:49:13 +03:00
|
|
|
else if (UTF8traits::is2byte(*c))
|
2001-12-04 04:10:43 +03:00
|
|
|
c += 2;
|
2001-12-05 06:49:13 +03:00
|
|
|
else if (UTF8traits::is3byte(*c))
|
2001-12-04 04:10:43 +03:00
|
|
|
c += 3;
|
2001-12-05 06:49:13 +03:00
|
|
|
else if (UTF8traits::is4byte(*c))
|
2001-12-04 04:10:43 +03:00
|
|
|
c += 4;
|
2001-12-05 06:49:13 +03:00
|
|
|
else if (UTF8traits::is5byte(*c))
|
2001-12-04 04:10:43 +03:00
|
|
|
c += 5;
|
2001-12-05 06:49:13 +03:00
|
|
|
else if (UTF8traits::is6byte(*c))
|
|
|
|
c += 6;
|
|
|
|
else {
|
2001-12-04 04:10:43 +03:00
|
|
|
NS_WARNING("Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
|
2001-12-05 06:49:13 +03:00
|
|
|
break; // Otherwise we go into an infinite loop. But what happens now?
|
|
|
|
}
|
1998-04-14 00:24:54 +04:00
|
|
|
}
|
2002-01-25 02:53:53 +03:00
|
|
|
if (c > end) {
|
2002-01-12 05:38:08 +03:00
|
|
|
c = lastchar;
|
2002-01-25 02:53:53 +03:00
|
|
|
ucs2bytes--;
|
|
|
|
}
|
1998-04-14 00:24:54 +04:00
|
|
|
|
2002-01-25 02:53:53 +03:00
|
|
|
aValidUTF8bytes = c - aBuffer;
|
|
|
|
aValidUCS2chars = ucs2bytes;
|
2001-12-04 04:10:43 +03:00
|
|
|
}
|
1998-04-14 00:24:54 +04:00
|
|
|
|
2001-12-04 04:10:43 +03:00
|
|
|
NS_COM nsresult
|
|
|
|
NS_NewUTF8ConverterStream(nsIUnicharInputStream** aInstancePtrResult,
|
|
|
|
nsIInputStream* aStreamToWrap,
|
|
|
|
PRInt32 aBufferSize)
|
|
|
|
{
|
1998-04-14 00:24:54 +04:00
|
|
|
// Create converter input stream
|
2001-12-04 04:10:43 +03:00
|
|
|
UTF8InputStream* it =
|
|
|
|
new UTF8InputStream(aStreamToWrap, aBufferSize);
|
|
|
|
|
1998-04-14 00:24:54 +04:00
|
|
|
if (nsnull == it) {
|
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
1999-08-23 14:14:16 +04:00
|
|
|
return it->QueryInterface(NS_GET_IID(nsIUnicharInputStream),
|
1998-04-14 00:24:54 +04:00
|
|
|
(void **) aInstancePtrResult);
|
|
|
|
}
|