зеркало из https://github.com/mozilla/pjs.git
17275, 16285, 15204: Switch to using nsISaveAsCharset for both charset and entity conversion. r=nhotta
This commit is contained in:
Родитель
ebcfb0c284
Коммит
a6e83503f7
|
@ -29,18 +29,18 @@
|
||||||
|
|
||||||
|
|
||||||
#include "nsHTMLContentSinkStream.h"
|
#include "nsHTMLContentSinkStream.h"
|
||||||
#include "nsHTMLTokens.h"
|
#include "nsIParserNode.h"
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "nsString.h"
|
#include "nsString.h"
|
||||||
#include "nsIParser.h"
|
#include "nsIParser.h"
|
||||||
#include "nsHTMLEntities.h"
|
|
||||||
#include "nsCRT.h"
|
|
||||||
#include "nsIDocumentEncoder.h" // for output flags
|
|
||||||
|
|
||||||
#include "nsIUnicodeEncoder.h"
|
|
||||||
#include "nsICharsetAlias.h"
|
#include "nsICharsetAlias.h"
|
||||||
#include "nsIServiceManager.h"
|
#include "nsIServiceManager.h"
|
||||||
#include "nsICharsetConverterManager.h"
|
#include "nsISaveAsCharset.h"
|
||||||
|
#include "nsIEntityConverter.h"
|
||||||
|
#include "nsCRT.h"
|
||||||
|
#include "nsIDocumentEncoder.h" // for output flags
|
||||||
|
#include "nshtmlpars.h"
|
||||||
|
|
||||||
#include "nsIOutputStream.h"
|
#include "nsIOutputStream.h"
|
||||||
#include "nsFileStream.h"
|
#include "nsFileStream.h"
|
||||||
|
|
||||||
|
@ -49,8 +49,7 @@ static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||||
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
||||||
static NS_DEFINE_IID(kIHTMLContentSinkStreamIID, NS_IHTMLCONTENTSINKSTREAM_IID);
|
static NS_DEFINE_IID(kIHTMLContentSinkStreamIID, NS_IHTMLCONTENTSINKSTREAM_IID);
|
||||||
|
static NS_DEFINE_CID(kSaveAsCharsetCID, NS_SAVEASCHARSET_CID);
|
||||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
|
||||||
|
|
||||||
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
|
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
|
||||||
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
|
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
|
||||||
|
@ -245,56 +244,6 @@ NS_IMPL_ADDREF(nsHTMLContentSinkStream)
|
||||||
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
|
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inits the encoder instance variable for the sink based on the charset
|
|
||||||
*
|
|
||||||
* @update gpk 4/21/99
|
|
||||||
* @param aCharset
|
|
||||||
* @return NS_xxx error result
|
|
||||||
*/
|
|
||||||
nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
|
|
||||||
{
|
|
||||||
|
|
||||||
nsresult res = NS_OK;
|
|
||||||
|
|
||||||
nsICharsetAlias* calias = nsnull;
|
|
||||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
|
||||||
kICharsetAliasIID,
|
|
||||||
(nsISupports**)&calias);
|
|
||||||
|
|
||||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
|
||||||
nsAutoString charsetName = aCharset;
|
|
||||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
|
||||||
{
|
|
||||||
res = calias->GetPreferred(aCharset, charsetName);
|
|
||||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
|
||||||
|
|
||||||
if(NS_FAILED(res))
|
|
||||||
{
|
|
||||||
// failed - unknown alias , fallback to ISO-8859-1
|
|
||||||
charsetName = "ISO-8859-1";
|
|
||||||
}
|
|
||||||
|
|
||||||
nsICharsetConverterManager * ccm = nsnull;
|
|
||||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
|
||||||
nsCOMTypeInfo<nsICharsetConverterManager>::GetIID(),
|
|
||||||
(nsISupports**)&ccm);
|
|
||||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
|
||||||
{
|
|
||||||
nsIUnicodeEncoder * encoder = nsnull;
|
|
||||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
|
||||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
|
||||||
{
|
|
||||||
NS_IF_RELEASE(mUnicodeEncoder);
|
|
||||||
mUnicodeEncoder = encoder;
|
|
||||||
}
|
|
||||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a content sink stream.
|
* Construct a content sink stream.
|
||||||
* @update gess7/7/98
|
* @update gess7/7/98
|
||||||
|
@ -309,11 +258,10 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream()
|
||||||
mHTMLStackPos = 0;
|
mHTMLStackPos = 0;
|
||||||
mColPos = 0;
|
mColPos = 0;
|
||||||
mIndent = 0;
|
mIndent = 0;
|
||||||
mUnicodeEncoder = nsnull;
|
|
||||||
mInBody = PR_FALSE;
|
mInBody = PR_FALSE;
|
||||||
mBuffer = nsnull;
|
mBuffer = nsnull;
|
||||||
mBufferSize=0;
|
mBufferSize = 0;
|
||||||
mBufferLength=0;
|
mBufferLength = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
NS_IMETHODIMP
|
NS_IMETHODIMP
|
||||||
|
@ -332,7 +280,7 @@ nsHTMLContentSinkStream::Initialize(nsIOutputStream* aOutStream,
|
||||||
mStream = aOutStream;
|
mStream = aOutStream;
|
||||||
mString = aOutString;
|
mString = aOutString;
|
||||||
if (aCharsetOverride != nsnull)
|
if (aCharsetOverride != nsnull)
|
||||||
mCharsetOverride = *aCharsetOverride;
|
mCharsetOverride.Assign(*aCharsetOverride);
|
||||||
|
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
@ -377,85 +325,89 @@ nsHTMLContentSinkStream::EndContext(PRInt32 aPosition)
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/*
|
* Initialize the Unicode encoder with our current mCharsetOverride.
|
||||||
* Entities are represented in the dom as single elements.
|
|
||||||
* Substitute them back into entity for (e.g. ´) here.
|
|
||||||
*/
|
*/
|
||||||
void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc,
|
NS_IMETHODIMP
|
||||||
nsString& aDst)
|
nsHTMLContentSinkStream::InitEncoder()
|
||||||
{
|
{
|
||||||
PRInt32 length = aSrc.Length();
|
nsAutoString charsetName = mCharsetOverride;
|
||||||
PRUnichar ch;
|
nsresult res;
|
||||||
|
nsICharsetAlias* calias = nsnull;
|
||||||
if (mUnicodeEncoder == nsnull)
|
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||||
InitEncoder("");
|
kICharsetAliasIID,
|
||||||
|
(nsISupports**)&calias);
|
||||||
if (length > 0)
|
|
||||||
{
|
NS_ASSERTION(nsnull != calias, "cannot find charset alias");
|
||||||
// Convert anything that maps to character entity
|
if(NS_SUCCEEDED(res) && (nsnull != calias))
|
||||||
// to the entity value
|
{
|
||||||
EnsureBufferSize(length);
|
res = calias->GetPreferred(mCharsetOverride, charsetName);
|
||||||
|
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||||
for (PRInt32 i = 0; i < length; i++)
|
|
||||||
{
|
|
||||||
ch = aSrc.CharAt(i);
|
|
||||||
|
|
||||||
const nsCString& entity = nsHTMLEntities::UnicodeToEntity(ch);
|
|
||||||
if (0 < entity.Length())
|
|
||||||
{
|
|
||||||
aDst.Append('&');
|
|
||||||
aDst.Append(entity);
|
|
||||||
aDst.Append(';');
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
aDst.Append(ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (NS_FAILED(res))
|
||||||
|
{
|
||||||
|
// failed - unknown alias , fallback to ISO-8859-1
|
||||||
|
charsetName = "ISO-8859-1";
|
||||||
|
}
|
||||||
|
|
||||||
|
res = nsComponentManager::CreateInstance(kSaveAsCharsetCID, NULL,
|
||||||
|
nsISaveAsCharset::GetIID(),
|
||||||
|
getter_AddRefs(mUnicodeEncoder));
|
||||||
|
if (NS_FAILED(res))
|
||||||
|
return res;
|
||||||
|
// SaveAsCharset requires a const char* in its first argument:
|
||||||
|
nsCAutoString charsetCString (charsetName);
|
||||||
|
res = mUnicodeEncoder->Init(charsetCString,
|
||||||
|
nsISaveAsCharset::attr_EntityBeforeCharsetConv
|
||||||
|
| nsISaveAsCharset::attr_FallbackDecimalNCR,
|
||||||
|
nsIEntityConverter::html40);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
void nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc)
|
* Use mUnicodeEncoder to encode to the buffer;
|
||||||
|
* this also encodes entities, so it's useful even for the default charset.
|
||||||
|
*
|
||||||
|
* @param aSrc - the string to be encoded.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc)
|
||||||
{
|
{
|
||||||
nsString htmlstr;
|
char *encodedBuffer = nsnull;
|
||||||
UnicodeToHTMLString(aSrc, htmlstr);
|
nsresult res;
|
||||||
|
|
||||||
NS_VERIFY(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized");
|
// Initialize the encoder if we haven't already
|
||||||
if (mUnicodeEncoder == nsnull)
|
if (!mUnicodeEncoder)
|
||||||
return;
|
InitEncoder();
|
||||||
|
|
||||||
PRInt32 length = htmlstr.Length();
|
// if (mBuffer)
|
||||||
nsresult result;
|
// nsAllocator::Free(mBuffer);
|
||||||
|
|
||||||
if (mUnicodeEncoder != nsnull && length > 0)
|
if (mUnicodeEncoder)
|
||||||
{
|
{
|
||||||
EnsureBufferSize(length);
|
// Call the converter to convert to the target charset.
|
||||||
mBufferLength = mBufferSize;
|
// Convert() takes a char* output param even though it's writing unicode.
|
||||||
|
res = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &encodedBuffer);
|
||||||
mUnicodeEncoder->Reset();
|
if (!NS_SUCCEEDED(res))
|
||||||
result = mUnicodeEncoder->Convert(htmlstr.GetUnicode(), &length,
|
|
||||||
mBuffer, &mBufferLength);
|
|
||||||
mBuffer[mBufferLength] = 0;
|
|
||||||
PRInt32 temp = mBufferLength;
|
|
||||||
|
|
||||||
if (NS_SUCCEEDED(result))
|
|
||||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// Do some conversions to make up for the unicode encoder's foibles:
|
|
||||||
PRInt32 nbsp = nsHTMLEntities::EntityToUnicode(nsCAutoString("nbsp"));
|
|
||||||
PRInt32 quot = nsHTMLEntities::EntityToUnicode(nsCAutoString("quot"));
|
|
||||||
for (PRInt32 i = 0; i < mBufferLength; i++)
|
|
||||||
{
|
{
|
||||||
if (mBuffer[i] == quot)
|
#ifdef DEBUG_akkana
|
||||||
mBuffer[i] = '"';
|
printf("Unicode convert didn't work!\n");
|
||||||
// I don't know why this nbsp mapping was here ...
|
|
||||||
else if (mBuffer[i] == nbsp)
|
|
||||||
mBuffer[i] = ' ';
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
encodedBuffer = aSrc.ToNewUTF8String();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef DEBUG_akkana
|
||||||
|
printf("Unicode convert didn't work!\n");
|
||||||
|
#endif
|
||||||
|
encodedBuffer = aSrc.ToNewCString();
|
||||||
|
}
|
||||||
|
mBufferLength = 0;
|
||||||
|
if (encodedBuffer) {
|
||||||
|
mBufferLength = nsCRT::strlen(encodedBuffer);
|
||||||
|
EnsureBufferSize(mBufferLength+1);
|
||||||
|
nsCRT::memcpy(mBuffer, encodedBuffer, mBufferLength+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,7 +426,7 @@ void nsHTMLContentSinkStream::Write(const nsString& aString)
|
||||||
// Now handle the stream case:
|
// Now handle the stream case:
|
||||||
nsOutputStream out(mStream);
|
nsOutputStream out(mStream);
|
||||||
|
|
||||||
// If a encoder is being used then convert first convert the input string
|
// If an encoder is being used then convert first convert the input string
|
||||||
if (mUnicodeEncoder)
|
if (mUnicodeEncoder)
|
||||||
{
|
{
|
||||||
EncodeToBuffer(aString);
|
EncodeToBuffer(aString);
|
||||||
|
@ -527,9 +479,10 @@ void nsHTMLContentSinkStream::Write(char aData)
|
||||||
* @param
|
* @param
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
|
nsHTMLContentSinkStream::~nsHTMLContentSinkStream()
|
||||||
NS_IF_RELEASE(mUnicodeEncoder);
|
{
|
||||||
if(mBuffer) delete [] mBuffer;
|
if (mBuffer)
|
||||||
|
nsAllocator::Free(mBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -984,8 +937,9 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode)
|
||||||
|
|
||||||
if (tag == eHTMLTag_body)
|
if (tag == eHTMLTag_body)
|
||||||
mInBody = PR_FALSE;
|
mInBody = PR_FALSE;
|
||||||
|
|
||||||
if ((mDoFormat && BreakAfterClose(tag)) || tag == eHTMLTag_html)
|
if ((mDoFormat && BreakAfterClose(tag))
|
||||||
|
|| tag == eHTMLTag_body || tag == eHTMLTag_html)
|
||||||
{
|
{
|
||||||
Write(NS_LINEBREAK);
|
Write(NS_LINEBREAK);
|
||||||
mColPos = 0;
|
mColPos = 0;
|
||||||
|
@ -1206,9 +1160,8 @@ nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||||
if (key.Equals("charset"))
|
if (key.Equals("charset"))
|
||||||
{
|
{
|
||||||
if (mCharsetOverride.Length() == 0)
|
if (mCharsetOverride.Length() == 0)
|
||||||
InitEncoder(value);
|
mCharsetOverride.Assign(value);
|
||||||
else
|
InitEncoder();
|
||||||
InitEncoder(mCharsetOverride);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,12 +39,10 @@
|
||||||
#ifndef NS_TXTCONTENTSINK_STREAM
|
#ifndef NS_TXTCONTENTSINK_STREAM
|
||||||
#define NS_TXTCONTENTSINK_STREAM
|
#define NS_TXTCONTENTSINK_STREAM
|
||||||
|
|
||||||
#include "nsIParserNode.h"
|
|
||||||
#include "nsIHTMLContentSink.h"
|
#include "nsIHTMLContentSink.h"
|
||||||
#include "nshtmlpars.h"
|
|
||||||
#include "nsHTMLTokens.h"
|
|
||||||
#include "nsParserCIID.h"
|
#include "nsParserCIID.h"
|
||||||
#include "nsCOMPtr.h"
|
#include "nsCOMPtr.h"
|
||||||
|
#include "nsHTMLTokens.h" // for eHTMLTags
|
||||||
|
|
||||||
#define NS_IHTMLCONTENTSINKSTREAM_IID \
|
#define NS_IHTMLCONTENTSINKSTREAM_IID \
|
||||||
{0xa39c6bff, 0x15f0, 0x11d2, \
|
{0xa39c6bff, 0x15f0, 0x11d2, \
|
||||||
|
@ -54,7 +52,8 @@
|
||||||
class ostream;
|
class ostream;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class nsIUnicodeEncoder;
|
class nsIParserNode;
|
||||||
|
class nsISaveAsCharset;
|
||||||
class nsIOutputStream;
|
class nsIOutputStream;
|
||||||
|
|
||||||
class nsIHTMLContentSinkStream : public nsIHTMLContentSink {
|
class nsIHTMLContentSinkStream : public nsIHTMLContentSink {
|
||||||
|
@ -149,10 +148,8 @@ protected:
|
||||||
void AddIndent();
|
void AddIndent();
|
||||||
void EnsureBufferSize(PRInt32 aNewSize);
|
void EnsureBufferSize(PRInt32 aNewSize);
|
||||||
|
|
||||||
nsresult InitEncoder(const nsString& aCharset);
|
|
||||||
|
|
||||||
void UnicodeToHTMLString(const nsString& aSrc, nsString& aDst);
|
|
||||||
void EncodeToBuffer(const nsString& aString);
|
void EncodeToBuffer(const nsString& aString);
|
||||||
|
NS_IMETHOD InitEncoder();
|
||||||
|
|
||||||
void Write(const nsString& aString);
|
void Write(const nsString& aString);
|
||||||
void Write(const char* aCharBuffer);
|
void Write(const char* aCharBuffer);
|
||||||
|
@ -180,8 +177,8 @@ protected:
|
||||||
PRBool mDoHeader;
|
PRBool mDoHeader;
|
||||||
PRBool mBodyOnly;
|
PRBool mBodyOnly;
|
||||||
|
|
||||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
nsCOMPtr<nsISaveAsCharset> mUnicodeEncoder;
|
||||||
nsString mCharsetOverride;
|
nsCAutoString mCharsetOverride;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -29,18 +29,18 @@
|
||||||
|
|
||||||
|
|
||||||
#include "nsHTMLContentSinkStream.h"
|
#include "nsHTMLContentSinkStream.h"
|
||||||
#include "nsHTMLTokens.h"
|
#include "nsIParserNode.h"
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include "nsString.h"
|
#include "nsString.h"
|
||||||
#include "nsIParser.h"
|
#include "nsIParser.h"
|
||||||
#include "nsHTMLEntities.h"
|
|
||||||
#include "nsCRT.h"
|
|
||||||
#include "nsIDocumentEncoder.h" // for output flags
|
|
||||||
|
|
||||||
#include "nsIUnicodeEncoder.h"
|
|
||||||
#include "nsICharsetAlias.h"
|
#include "nsICharsetAlias.h"
|
||||||
#include "nsIServiceManager.h"
|
#include "nsIServiceManager.h"
|
||||||
#include "nsICharsetConverterManager.h"
|
#include "nsISaveAsCharset.h"
|
||||||
|
#include "nsIEntityConverter.h"
|
||||||
|
#include "nsCRT.h"
|
||||||
|
#include "nsIDocumentEncoder.h" // for output flags
|
||||||
|
#include "nshtmlpars.h"
|
||||||
|
|
||||||
#include "nsIOutputStream.h"
|
#include "nsIOutputStream.h"
|
||||||
#include "nsFileStream.h"
|
#include "nsFileStream.h"
|
||||||
|
|
||||||
|
@ -49,8 +49,7 @@ static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||||
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
||||||
static NS_DEFINE_IID(kIHTMLContentSinkStreamIID, NS_IHTMLCONTENTSINKSTREAM_IID);
|
static NS_DEFINE_IID(kIHTMLContentSinkStreamIID, NS_IHTMLCONTENTSINKSTREAM_IID);
|
||||||
|
static NS_DEFINE_CID(kSaveAsCharsetCID, NS_SAVEASCHARSET_CID);
|
||||||
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
|
||||||
|
|
||||||
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
|
static char* gHeaderComment = "<!-- This page was created by the Gecko output system. -->";
|
||||||
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
|
static char* gDocTypeHeader = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">";
|
||||||
|
@ -245,56 +244,6 @@ NS_IMPL_ADDREF(nsHTMLContentSinkStream)
|
||||||
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
|
NS_IMPL_RELEASE(nsHTMLContentSinkStream)
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inits the encoder instance variable for the sink based on the charset
|
|
||||||
*
|
|
||||||
* @update gpk 4/21/99
|
|
||||||
* @param aCharset
|
|
||||||
* @return NS_xxx error result
|
|
||||||
*/
|
|
||||||
nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
|
|
||||||
{
|
|
||||||
|
|
||||||
nsresult res = NS_OK;
|
|
||||||
|
|
||||||
nsICharsetAlias* calias = nsnull;
|
|
||||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
|
||||||
kICharsetAliasIID,
|
|
||||||
(nsISupports**)&calias);
|
|
||||||
|
|
||||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
|
||||||
nsAutoString charsetName = aCharset;
|
|
||||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
|
||||||
{
|
|
||||||
res = calias->GetPreferred(aCharset, charsetName);
|
|
||||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
|
||||||
|
|
||||||
if(NS_FAILED(res))
|
|
||||||
{
|
|
||||||
// failed - unknown alias , fallback to ISO-8859-1
|
|
||||||
charsetName = "ISO-8859-1";
|
|
||||||
}
|
|
||||||
|
|
||||||
nsICharsetConverterManager * ccm = nsnull;
|
|
||||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
|
||||||
nsCOMTypeInfo<nsICharsetConverterManager>::GetIID(),
|
|
||||||
(nsISupports**)&ccm);
|
|
||||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
|
||||||
{
|
|
||||||
nsIUnicodeEncoder * encoder = nsnull;
|
|
||||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
|
||||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
|
||||||
{
|
|
||||||
NS_IF_RELEASE(mUnicodeEncoder);
|
|
||||||
mUnicodeEncoder = encoder;
|
|
||||||
}
|
|
||||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a content sink stream.
|
* Construct a content sink stream.
|
||||||
* @update gess7/7/98
|
* @update gess7/7/98
|
||||||
|
@ -309,11 +258,10 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream()
|
||||||
mHTMLStackPos = 0;
|
mHTMLStackPos = 0;
|
||||||
mColPos = 0;
|
mColPos = 0;
|
||||||
mIndent = 0;
|
mIndent = 0;
|
||||||
mUnicodeEncoder = nsnull;
|
|
||||||
mInBody = PR_FALSE;
|
mInBody = PR_FALSE;
|
||||||
mBuffer = nsnull;
|
mBuffer = nsnull;
|
||||||
mBufferSize=0;
|
mBufferSize = 0;
|
||||||
mBufferLength=0;
|
mBufferLength = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
NS_IMETHODIMP
|
NS_IMETHODIMP
|
||||||
|
@ -332,7 +280,7 @@ nsHTMLContentSinkStream::Initialize(nsIOutputStream* aOutStream,
|
||||||
mStream = aOutStream;
|
mStream = aOutStream;
|
||||||
mString = aOutString;
|
mString = aOutString;
|
||||||
if (aCharsetOverride != nsnull)
|
if (aCharsetOverride != nsnull)
|
||||||
mCharsetOverride = *aCharsetOverride;
|
mCharsetOverride.Assign(*aCharsetOverride);
|
||||||
|
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
@ -377,85 +325,89 @@ nsHTMLContentSinkStream::EndContext(PRInt32 aPosition)
|
||||||
return NS_OK;
|
return NS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/*
|
* Initialize the Unicode encoder with our current mCharsetOverride.
|
||||||
* Entities are represented in the dom as single elements.
|
|
||||||
* Substitute them back into entity for (e.g. ´) here.
|
|
||||||
*/
|
*/
|
||||||
void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc,
|
NS_IMETHODIMP
|
||||||
nsString& aDst)
|
nsHTMLContentSinkStream::InitEncoder()
|
||||||
{
|
{
|
||||||
PRInt32 length = aSrc.Length();
|
nsAutoString charsetName = mCharsetOverride;
|
||||||
PRUnichar ch;
|
nsresult res;
|
||||||
|
nsICharsetAlias* calias = nsnull;
|
||||||
if (mUnicodeEncoder == nsnull)
|
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||||
InitEncoder("");
|
kICharsetAliasIID,
|
||||||
|
(nsISupports**)&calias);
|
||||||
if (length > 0)
|
|
||||||
{
|
NS_ASSERTION(nsnull != calias, "cannot find charset alias");
|
||||||
// Convert anything that maps to character entity
|
if(NS_SUCCEEDED(res) && (nsnull != calias))
|
||||||
// to the entity value
|
{
|
||||||
EnsureBufferSize(length);
|
res = calias->GetPreferred(mCharsetOverride, charsetName);
|
||||||
|
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||||
for (PRInt32 i = 0; i < length; i++)
|
|
||||||
{
|
|
||||||
ch = aSrc.CharAt(i);
|
|
||||||
|
|
||||||
const nsCString& entity = nsHTMLEntities::UnicodeToEntity(ch);
|
|
||||||
if (0 < entity.Length())
|
|
||||||
{
|
|
||||||
aDst.Append('&');
|
|
||||||
aDst.Append(entity);
|
|
||||||
aDst.Append(';');
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
aDst.Append(ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (NS_FAILED(res))
|
||||||
|
{
|
||||||
|
// failed - unknown alias , fallback to ISO-8859-1
|
||||||
|
charsetName = "ISO-8859-1";
|
||||||
|
}
|
||||||
|
|
||||||
|
res = nsComponentManager::CreateInstance(kSaveAsCharsetCID, NULL,
|
||||||
|
nsISaveAsCharset::GetIID(),
|
||||||
|
getter_AddRefs(mUnicodeEncoder));
|
||||||
|
if (NS_FAILED(res))
|
||||||
|
return res;
|
||||||
|
// SaveAsCharset requires a const char* in its first argument:
|
||||||
|
nsCAutoString charsetCString (charsetName);
|
||||||
|
res = mUnicodeEncoder->Init(charsetCString,
|
||||||
|
nsISaveAsCharset::attr_EntityBeforeCharsetConv
|
||||||
|
| nsISaveAsCharset::attr_FallbackDecimalNCR,
|
||||||
|
nsIEntityConverter::html40);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
void nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc)
|
* Use mUnicodeEncoder to encode to the buffer;
|
||||||
|
* this also encodes entities, so it's useful even for the default charset.
|
||||||
|
*
|
||||||
|
* @param aSrc - the string to be encoded.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc)
|
||||||
{
|
{
|
||||||
nsString htmlstr;
|
char *encodedBuffer = nsnull;
|
||||||
UnicodeToHTMLString(aSrc, htmlstr);
|
nsresult res;
|
||||||
|
|
||||||
NS_VERIFY(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized");
|
// Initialize the encoder if we haven't already
|
||||||
if (mUnicodeEncoder == nsnull)
|
if (!mUnicodeEncoder)
|
||||||
return;
|
InitEncoder();
|
||||||
|
|
||||||
PRInt32 length = htmlstr.Length();
|
// if (mBuffer)
|
||||||
nsresult result;
|
// nsAllocator::Free(mBuffer);
|
||||||
|
|
||||||
if (mUnicodeEncoder != nsnull && length > 0)
|
if (mUnicodeEncoder)
|
||||||
{
|
{
|
||||||
EnsureBufferSize(length);
|
// Call the converter to convert to the target charset.
|
||||||
mBufferLength = mBufferSize;
|
// Convert() takes a char* output param even though it's writing unicode.
|
||||||
|
res = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &encodedBuffer);
|
||||||
mUnicodeEncoder->Reset();
|
if (!NS_SUCCEEDED(res))
|
||||||
result = mUnicodeEncoder->Convert(htmlstr.GetUnicode(), &length,
|
|
||||||
mBuffer, &mBufferLength);
|
|
||||||
mBuffer[mBufferLength] = 0;
|
|
||||||
PRInt32 temp = mBufferLength;
|
|
||||||
|
|
||||||
if (NS_SUCCEEDED(result))
|
|
||||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// Do some conversions to make up for the unicode encoder's foibles:
|
|
||||||
PRInt32 nbsp = nsHTMLEntities::EntityToUnicode(nsCAutoString("nbsp"));
|
|
||||||
PRInt32 quot = nsHTMLEntities::EntityToUnicode(nsCAutoString("quot"));
|
|
||||||
for (PRInt32 i = 0; i < mBufferLength; i++)
|
|
||||||
{
|
{
|
||||||
if (mBuffer[i] == quot)
|
#ifdef DEBUG_akkana
|
||||||
mBuffer[i] = '"';
|
printf("Unicode convert didn't work!\n");
|
||||||
// I don't know why this nbsp mapping was here ...
|
|
||||||
else if (mBuffer[i] == nbsp)
|
|
||||||
mBuffer[i] = ' ';
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
encodedBuffer = aSrc.ToNewUTF8String();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifdef DEBUG_akkana
|
||||||
|
printf("Unicode convert didn't work!\n");
|
||||||
|
#endif
|
||||||
|
encodedBuffer = aSrc.ToNewCString();
|
||||||
|
}
|
||||||
|
mBufferLength = 0;
|
||||||
|
if (encodedBuffer) {
|
||||||
|
mBufferLength = nsCRT::strlen(encodedBuffer);
|
||||||
|
EnsureBufferSize(mBufferLength+1);
|
||||||
|
nsCRT::memcpy(mBuffer, encodedBuffer, mBufferLength+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -474,7 +426,7 @@ void nsHTMLContentSinkStream::Write(const nsString& aString)
|
||||||
// Now handle the stream case:
|
// Now handle the stream case:
|
||||||
nsOutputStream out(mStream);
|
nsOutputStream out(mStream);
|
||||||
|
|
||||||
// If a encoder is being used then convert first convert the input string
|
// If an encoder is being used then convert first convert the input string
|
||||||
if (mUnicodeEncoder)
|
if (mUnicodeEncoder)
|
||||||
{
|
{
|
||||||
EncodeToBuffer(aString);
|
EncodeToBuffer(aString);
|
||||||
|
@ -527,9 +479,10 @@ void nsHTMLContentSinkStream::Write(char aData)
|
||||||
* @param
|
* @param
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
|
nsHTMLContentSinkStream::~nsHTMLContentSinkStream()
|
||||||
NS_IF_RELEASE(mUnicodeEncoder);
|
{
|
||||||
if(mBuffer) delete [] mBuffer;
|
if (mBuffer)
|
||||||
|
nsAllocator::Free(mBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -984,8 +937,9 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode)
|
||||||
|
|
||||||
if (tag == eHTMLTag_body)
|
if (tag == eHTMLTag_body)
|
||||||
mInBody = PR_FALSE;
|
mInBody = PR_FALSE;
|
||||||
|
|
||||||
if ((mDoFormat && BreakAfterClose(tag)) || tag == eHTMLTag_html)
|
if ((mDoFormat && BreakAfterClose(tag))
|
||||||
|
|| tag == eHTMLTag_body || tag == eHTMLTag_html)
|
||||||
{
|
{
|
||||||
Write(NS_LINEBREAK);
|
Write(NS_LINEBREAK);
|
||||||
mColPos = 0;
|
mColPos = 0;
|
||||||
|
@ -1206,9 +1160,8 @@ nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||||
if (key.Equals("charset"))
|
if (key.Equals("charset"))
|
||||||
{
|
{
|
||||||
if (mCharsetOverride.Length() == 0)
|
if (mCharsetOverride.Length() == 0)
|
||||||
InitEncoder(value);
|
mCharsetOverride.Assign(value);
|
||||||
else
|
InitEncoder();
|
||||||
InitEncoder(mCharsetOverride);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,12 +39,10 @@
|
||||||
#ifndef NS_TXTCONTENTSINK_STREAM
|
#ifndef NS_TXTCONTENTSINK_STREAM
|
||||||
#define NS_TXTCONTENTSINK_STREAM
|
#define NS_TXTCONTENTSINK_STREAM
|
||||||
|
|
||||||
#include "nsIParserNode.h"
|
|
||||||
#include "nsIHTMLContentSink.h"
|
#include "nsIHTMLContentSink.h"
|
||||||
#include "nshtmlpars.h"
|
|
||||||
#include "nsHTMLTokens.h"
|
|
||||||
#include "nsParserCIID.h"
|
#include "nsParserCIID.h"
|
||||||
#include "nsCOMPtr.h"
|
#include "nsCOMPtr.h"
|
||||||
|
#include "nsHTMLTokens.h" // for eHTMLTags
|
||||||
|
|
||||||
#define NS_IHTMLCONTENTSINKSTREAM_IID \
|
#define NS_IHTMLCONTENTSINKSTREAM_IID \
|
||||||
{0xa39c6bff, 0x15f0, 0x11d2, \
|
{0xa39c6bff, 0x15f0, 0x11d2, \
|
||||||
|
@ -54,7 +52,8 @@
|
||||||
class ostream;
|
class ostream;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
class nsIUnicodeEncoder;
|
class nsIParserNode;
|
||||||
|
class nsISaveAsCharset;
|
||||||
class nsIOutputStream;
|
class nsIOutputStream;
|
||||||
|
|
||||||
class nsIHTMLContentSinkStream : public nsIHTMLContentSink {
|
class nsIHTMLContentSinkStream : public nsIHTMLContentSink {
|
||||||
|
@ -149,10 +148,8 @@ protected:
|
||||||
void AddIndent();
|
void AddIndent();
|
||||||
void EnsureBufferSize(PRInt32 aNewSize);
|
void EnsureBufferSize(PRInt32 aNewSize);
|
||||||
|
|
||||||
nsresult InitEncoder(const nsString& aCharset);
|
|
||||||
|
|
||||||
void UnicodeToHTMLString(const nsString& aSrc, nsString& aDst);
|
|
||||||
void EncodeToBuffer(const nsString& aString);
|
void EncodeToBuffer(const nsString& aString);
|
||||||
|
NS_IMETHOD InitEncoder();
|
||||||
|
|
||||||
void Write(const nsString& aString);
|
void Write(const nsString& aString);
|
||||||
void Write(const char* aCharBuffer);
|
void Write(const char* aCharBuffer);
|
||||||
|
@ -180,8 +177,8 @@ protected:
|
||||||
PRBool mDoHeader;
|
PRBool mDoHeader;
|
||||||
PRBool mBodyOnly;
|
PRBool mBodyOnly;
|
||||||
|
|
||||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
nsCOMPtr<nsISaveAsCharset> mUnicodeEncoder;
|
||||||
nsString mCharsetOverride;
|
nsCAutoString mCharsetOverride;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче