зеркало из https://github.com/mozilla/pjs.git
Added support to use the specified document charset when encoding a
document to HTML to Text. The charset information is first encoded in XIF and then that information is used when interpretting the unicode for output. Added support to output character entity information which should address bug 4709
This commit is contained in:
Родитель
09c3f2e1b1
Коммит
65091e3762
|
@ -36,6 +36,14 @@
|
|||
#include "nsIParser.h"
|
||||
#include "nsHTMLEntities.h"
|
||||
|
||||
|
||||
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
||||
|
@ -343,6 +351,58 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
|
|||
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Inits the encoder instance variable for the sink based on the charset
|
||||
*
|
||||
* @update gpk 4/21/99
|
||||
* @param aCharset
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res))
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
(nsISupports**)&ccm);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeEncoder * encoder = nsnull;
|
||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mUnicodeEncoder = encoder;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Construct a content sink stream.
|
||||
* @update gess7/7/98
|
||||
|
@ -361,6 +421,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
|
|||
mDoHeader = aDoHeader;
|
||||
mBuffer = nsnull;
|
||||
mBufferSize = 0;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -381,6 +442,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm
|
|||
mDoHeader = aDoHeader;
|
||||
mBuffer = nsnull;
|
||||
mBufferSize = 0;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
|
||||
|
@ -443,9 +505,16 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
const char* entity = nsnull;
|
||||
PRUint32 offset = 0;
|
||||
PRUint32 addedLength = 0;
|
||||
nsAutoString data;
|
||||
|
||||
|
||||
if (mUnicodeEncoder == nsnull)
|
||||
InitEncoder("");
|
||||
|
||||
if (length > 0)
|
||||
{
|
||||
// Step 1. Convert anything that maps to character entity to
|
||||
// the entity value
|
||||
EnsureBufferSize(length);
|
||||
for (PRInt32 i = 0; i < length; i++)
|
||||
{
|
||||
|
@ -454,29 +523,31 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
entity = UnicodeToEntity(ch);
|
||||
if (entity)
|
||||
{
|
||||
PRUint32 size = strlen(entity);
|
||||
addedLength += size;
|
||||
EnsureBufferSize(length+addedLength+1);
|
||||
mBuffer[offset++] = '&';
|
||||
mBuffer[offset] = 0;
|
||||
strcat(mBuffer,entity);
|
||||
|
||||
PRUint32 temp = offset + size;
|
||||
while (offset < temp)
|
||||
{
|
||||
mBuffer[offset] = tolower(mBuffer[offset]);
|
||||
offset++;
|
||||
}
|
||||
mBuffer[offset++] = ';';
|
||||
mBuffer[offset] = 0;
|
||||
nsAutoString temp(entity);
|
||||
|
||||
temp.ToLowerCase();
|
||||
data.Append('&');
|
||||
data.Append(temp);
|
||||
data.Append(';');
|
||||
}
|
||||
else if (ch < 128)
|
||||
else
|
||||
{
|
||||
mBuffer[offset++] = (unsigned char)ch;
|
||||
mBuffer[offset] = 0;
|
||||
data.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2. Run the result through the converter
|
||||
length = data.Length();
|
||||
EnsureBufferSize(length);
|
||||
PRInt32 bufferLength = mBufferSize;
|
||||
|
||||
mUnicodeEncoder->Reset();
|
||||
nsresult result = mUnicodeEncoder->Convert(data, &length, mBuffer, &bufferLength);
|
||||
mBuffer[bufferLength] = 0;
|
||||
PRInt32 temp = bufferLength;
|
||||
if (NS_SUCCEEDED(result))
|
||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -487,6 +558,7 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
* @return
|
||||
*/
|
||||
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mOutput=0; //we don't own the stream we're given; just forget it.
|
||||
}
|
||||
|
||||
|
@ -526,8 +598,8 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream
|
|||
key.ToUpperCase();
|
||||
|
||||
|
||||
|
||||
key.ToCString(mBuffer,sizeof(gBuffer)-1);
|
||||
EnsureBufferSize(key.Length());
|
||||
key.ToCString(mBuffer,mBufferSize);
|
||||
|
||||
aStream << " " << mBuffer << char(kEqual);
|
||||
mColPos += 1 + strlen(mBuffer) + 1;
|
||||
|
@ -993,7 +1065,14 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){
|
|||
AddStartTag(aNode,aStream);
|
||||
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
|
||||
}
|
||||
if (type == eHTMLTag_text)
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
const nsString& entity = aNode.GetText();
|
||||
UnicodeToHTMLString(entity);
|
||||
aStream << '&' << mBuffer << ';';
|
||||
mColPos += entity.Length() + 2;
|
||||
}
|
||||
else if (type == eHTMLTag_text)
|
||||
{
|
||||
const nsString& text = aNode.GetText();
|
||||
if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE)
|
||||
|
@ -1136,9 +1215,25 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
|
|||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
if(mOutput) {
|
||||
AddStartTag(aNode,*mOutput);
|
||||
// eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
||||
if(mOutput)
|
||||
{
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
InitEncoder(value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
AddStartTag(aNode,*mOutput);
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
|
@ -53,6 +53,8 @@
|
|||
class ostream;
|
||||
#endif
|
||||
|
||||
class nsIUnicodeEncoder;
|
||||
|
||||
class nsHTMLContentSinkStream : public nsIHTMLContentSink {
|
||||
public:
|
||||
|
||||
|
@ -135,6 +137,7 @@ protected:
|
|||
void UnicodeToHTMLString(const nsString& aSrc);
|
||||
|
||||
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
|
||||
|
||||
|
@ -153,6 +156,8 @@ protected:
|
|||
|
||||
char* mBuffer;
|
||||
PRInt32 mBufferSize;
|
||||
|
||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
||||
};
|
||||
|
||||
extern NS_HTMLPARS nsresult
|
||||
|
|
|
@ -33,6 +33,12 @@
|
|||
#include "nsString.h"
|
||||
#include "nsIParser.h"
|
||||
#include "nsHTMLEntities.h"
|
||||
#include "nsXIFDTD.h"
|
||||
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||
|
@ -44,6 +50,55 @@ static PRBool IsInline(eHTMLTags aTag);
|
|||
static PRBool IsBlockLevel(eHTMLTags aTag);
|
||||
|
||||
|
||||
/**
|
||||
* Inits the encoder instance variable for the sink based on the charset
|
||||
*
|
||||
* @update gpk 4/21/99
|
||||
* @param aCharset
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res))
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
(nsISupports**)&ccm);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeEncoder * encoder = nsnull;
|
||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mUnicodeEncoder = encoder;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -117,6 +172,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
|
|||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBuffer = nsnull;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -133,6 +189,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
|
|||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBuffer = nsnull;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
|
||||
|
@ -145,6 +202,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
|
|||
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
|
||||
mOutput=0; //we don't own the stream we're given; just forget it.
|
||||
delete [] mBuffer;
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
}
|
||||
|
||||
|
||||
|
@ -409,42 +467,40 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize)
|
|||
}
|
||||
|
||||
|
||||
|
||||
void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
|
||||
{
|
||||
|
||||
|
||||
#define CH_NBSP 160
|
||||
#define CH_QUOT 34
|
||||
#define CH_AMP 38
|
||||
#define CH_LT 60
|
||||
#define CH_GT 62
|
||||
|
||||
PRInt32 length = aSrc.Length();
|
||||
PRUnichar ch;
|
||||
const char* entity = nsnull;
|
||||
PRUint32 offset = 0;
|
||||
PRUint32 addedLength = 0;
|
||||
nsresult result;
|
||||
PRInt32 bufferLength;
|
||||
|
||||
if (mUnicodeEncoder == nsnull)
|
||||
InitEncoder("");
|
||||
|
||||
if (length > 0)
|
||||
{
|
||||
EnsureBufferSize(length);
|
||||
for (PRInt32 i = 0; i < length; i++)
|
||||
{
|
||||
ch = aSrc.CharAt(i);
|
||||
switch (ch)
|
||||
{
|
||||
case CH_QUOT: ch = '"'; break;
|
||||
case CH_AMP: ch = '&'; break;
|
||||
case CH_GT: ch = '>'; break;
|
||||
case CH_LT: ch = '<'; break;
|
||||
case CH_NBSP: ch = ' '; break;
|
||||
}
|
||||
bufferLength = mBufferSize;
|
||||
|
||||
mUnicodeEncoder->Reset();
|
||||
result = mUnicodeEncoder->Convert(aSrc, &length, mBuffer, &bufferLength);
|
||||
mBuffer[bufferLength] = 0;
|
||||
PRInt32 temp = bufferLength;
|
||||
if (NS_SUCCEEDED(result))
|
||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
||||
|
||||
if (ch < 128)
|
||||
{
|
||||
mBuffer[offset++] = (unsigned char)ch;
|
||||
mBuffer[offset] = 0;
|
||||
}
|
||||
|
||||
for (PRInt32 i = 0; i < bufferLength; i++)
|
||||
{
|
||||
if (mBuffer[i] == char(CH_NBSP))
|
||||
mBuffer[i] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -483,6 +539,18 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
|
|||
mStrBuffer.Append(mBuffer);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
const nsString& text = aNode.GetText();
|
||||
UnicodeToTXTString(text);
|
||||
PRInt32 entity = NS_EntityToUnicode(mBuffer);
|
||||
if (entity < 256)
|
||||
{
|
||||
char ch = (char)entity;
|
||||
aStream << ch;
|
||||
mColPos++;
|
||||
}
|
||||
}
|
||||
else if (type == eHTMLTag_whitespace)
|
||||
{
|
||||
if (PR_TRUE)
|
||||
|
@ -551,6 +619,18 @@ NS_IMETHODIMP
|
|||
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
InitEncoder(value);
|
||||
}
|
||||
}
|
||||
|
||||
if (type == eHTMLTag_body)
|
||||
mDoOutput = PR_TRUE;
|
||||
|
|
|
@ -51,6 +51,9 @@
|
|||
class ostream;
|
||||
#endif
|
||||
|
||||
|
||||
class nsIUnicodeEncoder;
|
||||
|
||||
class nsHTMLToTXTSinkStream : public nsIHTMLContentSink {
|
||||
public:
|
||||
|
||||
|
@ -117,6 +120,7 @@ protected:
|
|||
|
||||
void EnsureBufferSize(PRInt32 aNewSize);
|
||||
void UnicodeToTXTString(const nsString& aSrc);
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
|
||||
protected:
|
||||
|
@ -127,7 +131,8 @@ protected:
|
|||
char* mBuffer;
|
||||
PRInt32 mBufferSize;
|
||||
|
||||
nsString mStrBuffer;
|
||||
nsString mStrBuffer;
|
||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ static NS_DEFINE_IID(kClassIID, NS_XIF_DTD_IID);
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
static const char* kXIFDocHeader= "<!DOCTYPE xif>";
|
||||
static const char* kXIFDocInfo= "document_info";
|
||||
static const char* kXIFCharset= "charset";
|
||||
|
||||
|
||||
struct nsXIFTagEntry {
|
||||
|
@ -73,7 +75,10 @@ nsXIFTagEntry gXIFTagTable[] =
|
|||
{"css_stylerule", eXIFTag_css_stylerule},
|
||||
{"css_stylesheet", eXIFTag_css_stylesheet},
|
||||
|
||||
{"document_info", eXIFTag_document_info},
|
||||
|
||||
{"encode", eXIFTag_encode},
|
||||
{"entity", eXIFTag_entity},
|
||||
|
||||
{"import", eXIFTag_import},
|
||||
|
||||
|
@ -343,6 +348,7 @@ nsXIFDTD::nsXIFDTD() : nsIDTD(){
|
|||
mInContent=PR_FALSE;
|
||||
mLowerCaseAttributes=PR_TRUE;
|
||||
mLowerCaseTags=PR_TRUE;
|
||||
mCharset = "";
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -395,15 +401,38 @@ eAutoDetectResult nsXIFDTD::CanParse(nsString& aContentType, nsString& aCommand,
|
|||
if(aContentType.Equals(kXIFTextContentType)){
|
||||
result=ePrimaryDetect;
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
if(kNotFound!=aBuffer.Find(kXIFDocHeader)) {
|
||||
PRInt32 offset = aBuffer.Find("<section>");
|
||||
if (offset != -1)
|
||||
aBuffer.Cut(0,offset);
|
||||
aContentType= kXIFTextContentType;
|
||||
result=ePrimaryDetect;
|
||||
}
|
||||
}
|
||||
|
||||
nsString charset ="ISO-8859-1";
|
||||
PRInt32 offset;
|
||||
offset = aBuffer.Find(kXIFDocInfo);
|
||||
if(kNotFound!=offset)
|
||||
{
|
||||
offset = aBuffer.Find(kXIFCharset);
|
||||
if (kNotFound!=offset)
|
||||
{
|
||||
PRInt32 start = aBuffer.Find('"',offset);
|
||||
PRInt32 end = aBuffer.Find('"',start+1);
|
||||
|
||||
if ((start != kNotFound) && (end != kNotFound))
|
||||
{
|
||||
charset = "";
|
||||
for (PRInt32 i = start+1; i < end; i++)
|
||||
{
|
||||
PRUnichar ch = aBuffer[i];
|
||||
charset.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mCharset = charset;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -638,6 +667,11 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
|
|||
result = OpenContainer(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_entity:
|
||||
StartTopOfStack();
|
||||
ProcessEntityTag(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_content:
|
||||
StartTopOfStack();
|
||||
mInContent = PR_TRUE;
|
||||
|
@ -647,6 +681,10 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
|
|||
ProcessEncodeTag(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_document_info:
|
||||
ProcessDocumentInfoTag(node);
|
||||
break;
|
||||
|
||||
|
||||
case eXIFTag_attr:
|
||||
AddAttribute(node);
|
||||
|
@ -1355,8 +1393,8 @@ void nsXIFDTD::BeginStartTag(const nsIParserNode& aNode)
|
|||
if (type == eXIFTag_container)
|
||||
PushHTMLTag(tag,tagName);
|
||||
|
||||
CToken* token = new CStartToken(tagName);
|
||||
nsCParserNode* node = new nsCParserNode(token);
|
||||
// CToken* token = new CStartToken(tagName);
|
||||
// nsCParserNode* node = new nsCParserNode(token);
|
||||
PushNodeAndToken(tagName);
|
||||
break;
|
||||
}
|
||||
|
@ -1629,6 +1667,38 @@ void nsXIFDTD::ProcessEncodeTag(const nsIParserNode& aNode)
|
|||
}
|
||||
|
||||
|
||||
void nsXIFDTD::ProcessEntityTag(const nsIParserNode& aNode)
|
||||
{
|
||||
nsString value;
|
||||
|
||||
if (GetAttribute(aNode,nsString("value"),value))
|
||||
{
|
||||
CEntityToken* entity = new CEntityToken(value);
|
||||
nsCParserNode node((CToken*)entity);
|
||||
mSink->AddLeaf(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nsXIFDTD::ProcessDocumentInfoTag(const nsIParserNode& aNode)
|
||||
{
|
||||
nsString value;
|
||||
nsString key("charset");
|
||||
|
||||
if (GetAttribute(aNode,key,value))
|
||||
{
|
||||
PushNodeAndToken(nsString("XIF_DOC_INFO"));
|
||||
CAttributeToken* attribute = new CAttributeToken(key,value);
|
||||
nsIParserNode* top = PeekNode();
|
||||
if (top != nsnull)
|
||||
((nsCParserNode*)top)->AddAttribute(attribute);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*** CSS Methods ****/
|
||||
|
||||
void nsXIFDTD::BeginCSSStyleSheet(const nsIParserNode& aNode)
|
||||
|
|
|
@ -71,7 +71,9 @@ enum eXIFTags
|
|||
eXIFTag_css_stylesheet,
|
||||
|
||||
eXIFTag_doctype,
|
||||
eXIFTag_encode,
|
||||
eXIFTag_document_info,
|
||||
eXIFTag_encode,
|
||||
eXIFTag_entity,
|
||||
eXIFTag_import,
|
||||
eXIFTag_leaf,
|
||||
eXIFTag_link,
|
||||
|
@ -490,6 +492,8 @@ private:
|
|||
private:
|
||||
|
||||
void ProcessEncodeTag(const nsIParserNode& aNode);
|
||||
void ProcessEntityTag(const nsIParserNode& aNode);
|
||||
void ProcessDocumentInfoTag(const nsIParserNode& aNode);
|
||||
|
||||
void BeginCSSStyleSheet(const nsIParserNode& aNode);
|
||||
void EndCSSStyleSheet(const nsIParserNode& aNode);
|
||||
|
@ -556,6 +560,7 @@ protected:
|
|||
PRBool mLowerCaseTags;
|
||||
PRBool mLowerCaseAttributes;
|
||||
nsITokenizer* mTokenizer;
|
||||
nsString mCharset;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -36,6 +36,14 @@
|
|||
#include "nsIParser.h"
|
||||
#include "nsHTMLEntities.h"
|
||||
|
||||
|
||||
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||
static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID);
|
||||
|
@ -343,6 +351,58 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult,
|
|||
return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Inits the encoder instance variable for the sink based on the charset
|
||||
*
|
||||
* @update gpk 4/21/99
|
||||
* @param aCharset
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res))
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
(nsISupports**)&ccm);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeEncoder * encoder = nsnull;
|
||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mUnicodeEncoder = encoder;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Construct a content sink stream.
|
||||
* @update gess7/7/98
|
||||
|
@ -361,6 +421,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHead
|
|||
mDoHeader = aDoHeader;
|
||||
mBuffer = nsnull;
|
||||
mBufferSize = 0;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -381,6 +442,7 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm
|
|||
mDoHeader = aDoHeader;
|
||||
mBuffer = nsnull;
|
||||
mBufferSize = 0;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
|
||||
|
@ -443,9 +505,16 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
const char* entity = nsnull;
|
||||
PRUint32 offset = 0;
|
||||
PRUint32 addedLength = 0;
|
||||
nsAutoString data;
|
||||
|
||||
|
||||
if (mUnicodeEncoder == nsnull)
|
||||
InitEncoder("");
|
||||
|
||||
if (length > 0)
|
||||
{
|
||||
// Step 1. Convert anything that maps to character entity to
|
||||
// the entity value
|
||||
EnsureBufferSize(length);
|
||||
for (PRInt32 i = 0; i < length; i++)
|
||||
{
|
||||
|
@ -454,29 +523,31 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
entity = UnicodeToEntity(ch);
|
||||
if (entity)
|
||||
{
|
||||
PRUint32 size = strlen(entity);
|
||||
addedLength += size;
|
||||
EnsureBufferSize(length+addedLength+1);
|
||||
mBuffer[offset++] = '&';
|
||||
mBuffer[offset] = 0;
|
||||
strcat(mBuffer,entity);
|
||||
|
||||
PRUint32 temp = offset + size;
|
||||
while (offset < temp)
|
||||
{
|
||||
mBuffer[offset] = tolower(mBuffer[offset]);
|
||||
offset++;
|
||||
}
|
||||
mBuffer[offset++] = ';';
|
||||
mBuffer[offset] = 0;
|
||||
nsAutoString temp(entity);
|
||||
|
||||
temp.ToLowerCase();
|
||||
data.Append('&');
|
||||
data.Append(temp);
|
||||
data.Append(';');
|
||||
}
|
||||
else if (ch < 128)
|
||||
else
|
||||
{
|
||||
mBuffer[offset++] = (unsigned char)ch;
|
||||
mBuffer[offset] = 0;
|
||||
data.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2. Run the result through the converter
|
||||
length = data.Length();
|
||||
EnsureBufferSize(length);
|
||||
PRInt32 bufferLength = mBufferSize;
|
||||
|
||||
mUnicodeEncoder->Reset();
|
||||
nsresult result = mUnicodeEncoder->Convert(data, &length, mBuffer, &bufferLength);
|
||||
mBuffer[bufferLength] = 0;
|
||||
PRInt32 temp = bufferLength;
|
||||
if (NS_SUCCEEDED(result))
|
||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -487,6 +558,7 @@ void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc)
|
|||
* @return
|
||||
*/
|
||||
nsHTMLContentSinkStream::~nsHTMLContentSinkStream() {
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mOutput=0; //we don't own the stream we're given; just forget it.
|
||||
}
|
||||
|
||||
|
@ -526,8 +598,8 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream
|
|||
key.ToUpperCase();
|
||||
|
||||
|
||||
|
||||
key.ToCString(mBuffer,sizeof(gBuffer)-1);
|
||||
EnsureBufferSize(key.Length());
|
||||
key.ToCString(mBuffer,mBufferSize);
|
||||
|
||||
aStream << " " << mBuffer << char(kEqual);
|
||||
mColPos += 1 + strlen(mBuffer) + 1;
|
||||
|
@ -993,7 +1065,14 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){
|
|||
AddStartTag(aNode,aStream);
|
||||
mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown;
|
||||
}
|
||||
if (type == eHTMLTag_text)
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
const nsString& entity = aNode.GetText();
|
||||
UnicodeToHTMLString(entity);
|
||||
aStream << '&' << mBuffer << ';';
|
||||
mColPos += entity.Length() + 2;
|
||||
}
|
||||
else if (type == eHTMLTag_text)
|
||||
{
|
||||
const nsString& text = aNode.GetText();
|
||||
if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE)
|
||||
|
@ -1136,9 +1215,25 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){
|
|||
*/
|
||||
NS_IMETHODIMP
|
||||
nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
if(mOutput) {
|
||||
AddStartTag(aNode,*mOutput);
|
||||
// eHTMLTags tag = (eHTMLTags)aNode.GetNodeType();
|
||||
if(mOutput)
|
||||
{
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
InitEncoder(value);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
AddStartTag(aNode,*mOutput);
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
|
|
@ -53,6 +53,8 @@
|
|||
class ostream;
|
||||
#endif
|
||||
|
||||
class nsIUnicodeEncoder;
|
||||
|
||||
class nsHTMLContentSinkStream : public nsIHTMLContentSink {
|
||||
public:
|
||||
|
||||
|
@ -135,6 +137,7 @@ protected:
|
|||
void UnicodeToHTMLString(const nsString& aSrc);
|
||||
|
||||
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
|
||||
|
||||
|
@ -153,6 +156,8 @@ protected:
|
|||
|
||||
char* mBuffer;
|
||||
PRInt32 mBufferSize;
|
||||
|
||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
||||
};
|
||||
|
||||
extern NS_HTMLPARS nsresult
|
||||
|
|
|
@ -33,6 +33,12 @@
|
|||
#include "nsString.h"
|
||||
#include "nsIParser.h"
|
||||
#include "nsHTMLEntities.h"
|
||||
#include "nsXIFDTD.h"
|
||||
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsICharsetAlias.h"
|
||||
#include "nsIServiceManager.h"
|
||||
#include "nsICharsetConverterManager.h"
|
||||
|
||||
static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID);
|
||||
static NS_DEFINE_IID(kIContentSinkIID, NS_ICONTENT_SINK_IID);
|
||||
|
@ -44,6 +50,55 @@ static PRBool IsInline(eHTMLTags aTag);
|
|||
static PRBool IsBlockLevel(eHTMLTags aTag);
|
||||
|
||||
|
||||
/**
|
||||
* Inits the encoder instance variable for the sink based on the charset
|
||||
*
|
||||
* @update gpk 4/21/99
|
||||
* @param aCharset
|
||||
* @return NS_xxx error result
|
||||
*/
|
||||
nsresult nsHTMLToTXTSinkStream::InitEncoder(const nsString& aCharset)
|
||||
{
|
||||
|
||||
nsresult res = NS_OK;
|
||||
|
||||
nsICharsetAlias* calias = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetAliasCID,
|
||||
kICharsetAliasIID,
|
||||
(nsISupports**)&calias);
|
||||
|
||||
NS_ASSERTION( nsnull != calias, "cannot find charet alias");
|
||||
nsAutoString charsetName = aCharset;
|
||||
if( NS_SUCCEEDED(res) && (nsnull != calias))
|
||||
{
|
||||
res = calias->GetPreferred(aCharset, charsetName);
|
||||
nsServiceManager::ReleaseService(kCharsetAliasCID, calias);
|
||||
|
||||
if(NS_FAILED(res))
|
||||
{
|
||||
// failed - unknown alias , fallback to ISO-8859-1
|
||||
charsetName = "ISO-8859-1";
|
||||
}
|
||||
|
||||
nsICharsetConverterManager * ccm = nsnull;
|
||||
res = nsServiceManager::GetService(kCharsetConverterManagerCID,
|
||||
kICharsetConverterManagerIID,
|
||||
(nsISupports**)&ccm);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != ccm))
|
||||
{
|
||||
nsIUnicodeEncoder * encoder = nsnull;
|
||||
res = ccm->GetUnicodeEncoder(&charsetName, &encoder);
|
||||
if(NS_SUCCEEDED(res) && (nsnull != encoder))
|
||||
{
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
mUnicodeEncoder = encoder;
|
||||
}
|
||||
nsServiceManager::ReleaseService(kCharsetConverterManagerCID, ccm);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -117,6 +172,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() {
|
|||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBuffer = nsnull;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -133,6 +189,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
|
|||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBuffer = nsnull;
|
||||
mUnicodeEncoder = nsnull;
|
||||
}
|
||||
|
||||
|
||||
|
@ -145,6 +202,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) {
|
|||
nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() {
|
||||
mOutput=0; //we don't own the stream we're given; just forget it.
|
||||
delete [] mBuffer;
|
||||
NS_IF_RELEASE(mUnicodeEncoder);
|
||||
}
|
||||
|
||||
|
||||
|
@ -409,42 +467,40 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize)
|
|||
}
|
||||
|
||||
|
||||
|
||||
void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc)
|
||||
{
|
||||
|
||||
|
||||
#define CH_NBSP 160
|
||||
#define CH_QUOT 34
|
||||
#define CH_AMP 38
|
||||
#define CH_LT 60
|
||||
#define CH_GT 62
|
||||
|
||||
PRInt32 length = aSrc.Length();
|
||||
PRUnichar ch;
|
||||
const char* entity = nsnull;
|
||||
PRUint32 offset = 0;
|
||||
PRUint32 addedLength = 0;
|
||||
nsresult result;
|
||||
PRInt32 bufferLength;
|
||||
|
||||
if (mUnicodeEncoder == nsnull)
|
||||
InitEncoder("");
|
||||
|
||||
if (length > 0)
|
||||
{
|
||||
EnsureBufferSize(length);
|
||||
for (PRInt32 i = 0; i < length; i++)
|
||||
{
|
||||
ch = aSrc.CharAt(i);
|
||||
switch (ch)
|
||||
{
|
||||
case CH_QUOT: ch = '"'; break;
|
||||
case CH_AMP: ch = '&'; break;
|
||||
case CH_GT: ch = '>'; break;
|
||||
case CH_LT: ch = '<'; break;
|
||||
case CH_NBSP: ch = ' '; break;
|
||||
}
|
||||
bufferLength = mBufferSize;
|
||||
|
||||
mUnicodeEncoder->Reset();
|
||||
result = mUnicodeEncoder->Convert(aSrc, &length, mBuffer, &bufferLength);
|
||||
mBuffer[bufferLength] = 0;
|
||||
PRInt32 temp = bufferLength;
|
||||
if (NS_SUCCEEDED(result))
|
||||
result = mUnicodeEncoder->Finish(mBuffer,&temp);
|
||||
|
||||
if (ch < 128)
|
||||
{
|
||||
mBuffer[offset++] = (unsigned char)ch;
|
||||
mBuffer[offset] = 0;
|
||||
}
|
||||
|
||||
for (PRInt32 i = 0; i < bufferLength; i++)
|
||||
{
|
||||
if (mBuffer[i] == char(CH_NBSP))
|
||||
mBuffer[i] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -483,6 +539,18 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream)
|
|||
mStrBuffer.Append(mBuffer);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
const nsString& text = aNode.GetText();
|
||||
UnicodeToTXTString(text);
|
||||
PRInt32 entity = NS_EntityToUnicode(mBuffer);
|
||||
if (entity < 256)
|
||||
{
|
||||
char ch = (char)entity;
|
||||
aStream << ch;
|
||||
mColPos++;
|
||||
}
|
||||
}
|
||||
else if (type == eHTMLTag_whitespace)
|
||||
{
|
||||
if (PR_TRUE)
|
||||
|
@ -551,6 +619,18 @@ NS_IMETHODIMP
|
|||
nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){
|
||||
eHTMLTags type = (eHTMLTags)aNode.GetNodeType();
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
InitEncoder(value);
|
||||
}
|
||||
}
|
||||
|
||||
if (type == eHTMLTag_body)
|
||||
mDoOutput = PR_TRUE;
|
||||
|
|
|
@ -51,6 +51,9 @@
|
|||
class ostream;
|
||||
#endif
|
||||
|
||||
|
||||
class nsIUnicodeEncoder;
|
||||
|
||||
class nsHTMLToTXTSinkStream : public nsIHTMLContentSink {
|
||||
public:
|
||||
|
||||
|
@ -117,6 +120,7 @@ protected:
|
|||
|
||||
void EnsureBufferSize(PRInt32 aNewSize);
|
||||
void UnicodeToTXTString(const nsString& aSrc);
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
|
||||
protected:
|
||||
|
@ -127,7 +131,8 @@ protected:
|
|||
char* mBuffer;
|
||||
PRInt32 mBufferSize;
|
||||
|
||||
nsString mStrBuffer;
|
||||
nsString mStrBuffer;
|
||||
nsIUnicodeEncoder* mUnicodeEncoder;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ static NS_DEFINE_IID(kClassIID, NS_XIF_DTD_IID);
|
|||
static const char* kNullToken = "Error: Null token given";
|
||||
static const char* kInvalidTagStackPos = "Error: invalid tag stack position";
|
||||
static const char* kXIFDocHeader= "<!DOCTYPE xif>";
|
||||
static const char* kXIFDocInfo= "document_info";
|
||||
static const char* kXIFCharset= "charset";
|
||||
|
||||
|
||||
struct nsXIFTagEntry {
|
||||
|
@ -73,7 +75,10 @@ nsXIFTagEntry gXIFTagTable[] =
|
|||
{"css_stylerule", eXIFTag_css_stylerule},
|
||||
{"css_stylesheet", eXIFTag_css_stylesheet},
|
||||
|
||||
{"document_info", eXIFTag_document_info},
|
||||
|
||||
{"encode", eXIFTag_encode},
|
||||
{"entity", eXIFTag_entity},
|
||||
|
||||
{"import", eXIFTag_import},
|
||||
|
||||
|
@ -343,6 +348,7 @@ nsXIFDTD::nsXIFDTD() : nsIDTD(){
|
|||
mInContent=PR_FALSE;
|
||||
mLowerCaseAttributes=PR_TRUE;
|
||||
mLowerCaseTags=PR_TRUE;
|
||||
mCharset = "";
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -395,15 +401,38 @@ eAutoDetectResult nsXIFDTD::CanParse(nsString& aContentType, nsString& aCommand,
|
|||
if(aContentType.Equals(kXIFTextContentType)){
|
||||
result=ePrimaryDetect;
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
if(kNotFound!=aBuffer.Find(kXIFDocHeader)) {
|
||||
PRInt32 offset = aBuffer.Find("<section>");
|
||||
if (offset != -1)
|
||||
aBuffer.Cut(0,offset);
|
||||
aContentType= kXIFTextContentType;
|
||||
result=ePrimaryDetect;
|
||||
}
|
||||
}
|
||||
|
||||
nsString charset ="ISO-8859-1";
|
||||
PRInt32 offset;
|
||||
offset = aBuffer.Find(kXIFDocInfo);
|
||||
if(kNotFound!=offset)
|
||||
{
|
||||
offset = aBuffer.Find(kXIFCharset);
|
||||
if (kNotFound!=offset)
|
||||
{
|
||||
PRInt32 start = aBuffer.Find('"',offset);
|
||||
PRInt32 end = aBuffer.Find('"',start+1);
|
||||
|
||||
if ((start != kNotFound) && (end != kNotFound))
|
||||
{
|
||||
charset = "";
|
||||
for (PRInt32 i = start+1; i < end; i++)
|
||||
{
|
||||
PRUnichar ch = aBuffer[i];
|
||||
charset.Append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
mCharset = charset;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -638,6 +667,11 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
|
|||
result = OpenContainer(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_entity:
|
||||
StartTopOfStack();
|
||||
ProcessEntityTag(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_content:
|
||||
StartTopOfStack();
|
||||
mInContent = PR_TRUE;
|
||||
|
@ -647,6 +681,10 @@ nsresult nsXIFDTD::HandleStartToken(CToken* aToken) {
|
|||
ProcessEncodeTag(node);
|
||||
break;
|
||||
|
||||
case eXIFTag_document_info:
|
||||
ProcessDocumentInfoTag(node);
|
||||
break;
|
||||
|
||||
|
||||
case eXIFTag_attr:
|
||||
AddAttribute(node);
|
||||
|
@ -1355,8 +1393,8 @@ void nsXIFDTD::BeginStartTag(const nsIParserNode& aNode)
|
|||
if (type == eXIFTag_container)
|
||||
PushHTMLTag(tag,tagName);
|
||||
|
||||
CToken* token = new CStartToken(tagName);
|
||||
nsCParserNode* node = new nsCParserNode(token);
|
||||
// CToken* token = new CStartToken(tagName);
|
||||
// nsCParserNode* node = new nsCParserNode(token);
|
||||
PushNodeAndToken(tagName);
|
||||
break;
|
||||
}
|
||||
|
@ -1629,6 +1667,38 @@ void nsXIFDTD::ProcessEncodeTag(const nsIParserNode& aNode)
|
|||
}
|
||||
|
||||
|
||||
void nsXIFDTD::ProcessEntityTag(const nsIParserNode& aNode)
|
||||
{
|
||||
nsString value;
|
||||
|
||||
if (GetAttribute(aNode,nsString("value"),value))
|
||||
{
|
||||
CEntityToken* entity = new CEntityToken(value);
|
||||
nsCParserNode node((CToken*)entity);
|
||||
mSink->AddLeaf(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nsXIFDTD::ProcessDocumentInfoTag(const nsIParserNode& aNode)
|
||||
{
|
||||
nsString value;
|
||||
nsString key("charset");
|
||||
|
||||
if (GetAttribute(aNode,key,value))
|
||||
{
|
||||
PushNodeAndToken(nsString("XIF_DOC_INFO"));
|
||||
CAttributeToken* attribute = new CAttributeToken(key,value);
|
||||
nsIParserNode* top = PeekNode();
|
||||
if (top != nsnull)
|
||||
((nsCParserNode*)top)->AddAttribute(attribute);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*** CSS Methods ****/
|
||||
|
||||
void nsXIFDTD::BeginCSSStyleSheet(const nsIParserNode& aNode)
|
||||
|
|
|
@ -71,7 +71,9 @@ enum eXIFTags
|
|||
eXIFTag_css_stylesheet,
|
||||
|
||||
eXIFTag_doctype,
|
||||
eXIFTag_encode,
|
||||
eXIFTag_document_info,
|
||||
eXIFTag_encode,
|
||||
eXIFTag_entity,
|
||||
eXIFTag_import,
|
||||
eXIFTag_leaf,
|
||||
eXIFTag_link,
|
||||
|
@ -490,6 +492,8 @@ private:
|
|||
private:
|
||||
|
||||
void ProcessEncodeTag(const nsIParserNode& aNode);
|
||||
void ProcessEntityTag(const nsIParserNode& aNode);
|
||||
void ProcessDocumentInfoTag(const nsIParserNode& aNode);
|
||||
|
||||
void BeginCSSStyleSheet(const nsIParserNode& aNode);
|
||||
void EndCSSStyleSheet(const nsIParserNode& aNode);
|
||||
|
@ -556,6 +560,7 @@ protected:
|
|||
PRBool mLowerCaseTags;
|
||||
PRBool mLowerCaseAttributes;
|
||||
nsITokenizer* mTokenizer;
|
||||
nsString mCharset;
|
||||
};
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче