diff --git a/htmlparser/src/nsExpatTokenizer.cpp b/htmlparser/src/nsExpatTokenizer.cpp index 5fad29e7e20..5f7ba55b5aa 100644 --- a/htmlparser/src/nsExpatTokenizer.cpp +++ b/htmlparser/src/nsExpatTokenizer.cpp @@ -27,11 +27,8 @@ #include "nsScanner.h" #include "nsDTDUtils.h" #include "nsParserError.h" -// #include "nsParser.h" #include "nsIParser.h" #include "prlog.h" -#include - /************************************************************************ And now for the main class -- nsExpatTokenizer... @@ -130,15 +127,19 @@ void nsExpatTokenizer::SetupExpatCallbacks(void) { * @param * @return */ -nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() { +nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() { NS_INIT_REFCNT(); mBytesParsed = 0; mSeenError = PR_FALSE; - mExpatParser = XML_ParserCreate(NULL); - gTokenRecycler=(CTokenRecycler*)GetTokenRecycler(); - if (mExpatParser) { - SetupExpatCallbacks(); - } + nsAutoString buffer("UTF-16"); + const PRUnichar* encoding = buffer.GetUnicode(); + if (encoding) { + mExpatParser = XML_ParserCreate((const XML_Char*) encoding); + gTokenRecycler=(CTokenRecycler*)GetTokenRecycler(); + if (mExpatParser) { + SetupExpatCallbacks(); + } + } } /** @@ -231,10 +232,9 @@ void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength) AddToken(theToken, NS_OK, *gTokenDeque,gTokenRecycler); } -nsresult nsExpatTokenizer::ParseXMLBuffer(const char *aBuffer, PRUint32 aLength){ +nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength){ nsresult result=NS_OK; - if (mExpatParser) { - PR_ASSERT(aLength == strlen(aBuffer)); + if (mExpatParser) { if (!mSeenError) { if (!XML_Parse(mExpatParser, aBuffer, aLength, PR_FALSE)) { PushXMLErrorToken(aBuffer, aLength); @@ -272,14 +272,14 @@ nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner) { nsString& theBuffer = aScanner.GetBuffer(); PRInt32 length = theBuffer.Length(); if(0 < length) { - char* expatBuffer = theBuffer.ToNewCString(); - if (expatBuffer) { + const PRUnichar* expatBuffer = theBuffer.GetUnicode(); + PRUint32 bufLength = theBuffer.Length() * 2; + if (expatBuffer) { gTokenDeque=&mTokenDeque; gExpatParser = mExpatParser; - result = ParseXMLBuffer(expatBuffer, length); - delete [] expatBuffer; + result = ParseXMLBuffer((const char *)expatBuffer, bufLength); } - theBuffer.Truncate(0); + theBuffer.Truncate(0); } if(NS_OK==result) result=aScanner.Eof(); diff --git a/htmlparser/src/nsExpatTokenizer.h b/htmlparser/src/nsExpatTokenizer.h index 9c79b0e3d56..520b1cdb391 100644 --- a/htmlparser/src/nsExpatTokenizer.h +++ b/htmlparser/src/nsExpatTokenizer.h @@ -29,6 +29,9 @@ #include "nsISupports.h" #include "nsHTMLTokenizer.h" #include "prtypes.h" + +// Enable unicode characters in expat. +#define XML_UNICODE_WCHAR_T #include "xmlparse.h" #define NS_EXPATTOKENIZER_IID \ diff --git a/parser/htmlparser/src/nsExpatTokenizer.cpp b/parser/htmlparser/src/nsExpatTokenizer.cpp index 5fad29e7e20..5f7ba55b5aa 100644 --- a/parser/htmlparser/src/nsExpatTokenizer.cpp +++ b/parser/htmlparser/src/nsExpatTokenizer.cpp @@ -27,11 +27,8 @@ #include "nsScanner.h" #include "nsDTDUtils.h" #include "nsParserError.h" -// #include "nsParser.h" #include "nsIParser.h" #include "prlog.h" -#include - /************************************************************************ And now for the main class -- nsExpatTokenizer... @@ -130,15 +127,19 @@ void nsExpatTokenizer::SetupExpatCallbacks(void) { * @param * @return */ -nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() { +nsExpatTokenizer::nsExpatTokenizer() : nsHTMLTokenizer() { NS_INIT_REFCNT(); mBytesParsed = 0; mSeenError = PR_FALSE; - mExpatParser = XML_ParserCreate(NULL); - gTokenRecycler=(CTokenRecycler*)GetTokenRecycler(); - if (mExpatParser) { - SetupExpatCallbacks(); - } + nsAutoString buffer("UTF-16"); + const PRUnichar* encoding = buffer.GetUnicode(); + if (encoding) { + mExpatParser = XML_ParserCreate((const XML_Char*) encoding); + gTokenRecycler=(CTokenRecycler*)GetTokenRecycler(); + if (mExpatParser) { + SetupExpatCallbacks(); + } + } } /** @@ -231,10 +232,9 @@ void nsExpatTokenizer::PushXMLErrorToken(const char *aBuffer, PRUint32 aLength) AddToken(theToken, NS_OK, *gTokenDeque,gTokenRecycler); } -nsresult nsExpatTokenizer::ParseXMLBuffer(const char *aBuffer, PRUint32 aLength){ +nsresult nsExpatTokenizer::ParseXMLBuffer(const char* aBuffer, PRUint32 aLength){ nsresult result=NS_OK; - if (mExpatParser) { - PR_ASSERT(aLength == strlen(aBuffer)); + if (mExpatParser) { if (!mSeenError) { if (!XML_Parse(mExpatParser, aBuffer, aLength, PR_FALSE)) { PushXMLErrorToken(aBuffer, aLength); @@ -272,14 +272,14 @@ nsresult nsExpatTokenizer::ConsumeToken(nsScanner& aScanner) { nsString& theBuffer = aScanner.GetBuffer(); PRInt32 length = theBuffer.Length(); if(0 < length) { - char* expatBuffer = theBuffer.ToNewCString(); - if (expatBuffer) { + const PRUnichar* expatBuffer = theBuffer.GetUnicode(); + PRUint32 bufLength = theBuffer.Length() * 2; + if (expatBuffer) { gTokenDeque=&mTokenDeque; gExpatParser = mExpatParser; - result = ParseXMLBuffer(expatBuffer, length); - delete [] expatBuffer; + result = ParseXMLBuffer((const char *)expatBuffer, bufLength); } - theBuffer.Truncate(0); + theBuffer.Truncate(0); } if(NS_OK==result) result=aScanner.Eof(); diff --git a/parser/htmlparser/src/nsExpatTokenizer.h b/parser/htmlparser/src/nsExpatTokenizer.h index 9c79b0e3d56..520b1cdb391 100644 --- a/parser/htmlparser/src/nsExpatTokenizer.h +++ b/parser/htmlparser/src/nsExpatTokenizer.h @@ -29,6 +29,9 @@ #include "nsISupports.h" #include "nsHTMLTokenizer.h" #include "prtypes.h" + +// Enable unicode characters in expat. +#define XML_UNICODE_WCHAR_T #include "xmlparse.h" #define NS_EXPATTOKENIZER_IID \