From d9ea7f8f8fd13deef2a3b58a304633f7e4656419 Mon Sep 17 00:00:00 2001 From: "bzbarsky%mit.edu" Date: Tue, 27 Aug 2002 23:47:25 +0000 Subject: [PATCH] Bug 119321 and bug 80106. Create a generic nsIUnicharStreamLoader that makes two callbacks -- one to determine the charset and one to provide an nsIUnicharInputStream. Use this in the CSSLoader instead of allocating a single big buffer for the sheet. Clean up the way we look for the @charset charset in the sheet data. r=rpotts for netwerk/, r=peterv for content/, r=smontagu for intl/, sr=darin for the whole thing. --- .../html/content/src/nsGenericHTMLElement.cpp | 9 - content/html/style/public/nsICSSLoader.h | 7 - content/html/style/public/nsICSSParser.h | 11 - content/html/style/src/nsCSSLoader.cpp | 603 ++++++++++-------- content/html/style/src/nsCSSParser.cpp | 22 - intl/uconv/public/nsIConverterInputStream.h | 2 +- intl/uconv/src/nsConverterInputStream.cpp | 94 ++- intl/uconv/src/nsConverterInputStream.h | 16 +- layout/style/nsCSSLoader.cpp | 603 ++++++++++-------- layout/style/nsCSSParser.cpp | 22 - layout/style/nsICSSLoader.h | 7 - layout/style/nsICSSParser.h | 11 - netwerk/base/public/MANIFEST_IDL | 1 + netwerk/base/public/Makefile.in | 1 + .../base/public/nsIUnicharStreamLoader.idl | 110 ++++ netwerk/base/public/nsNetUtil.h | 24 + netwerk/base/src/Makefile.in | 1 + netwerk/base/src/nsUnicharStreamLoader.cpp | 263 ++++++++ netwerk/base/src/nsUnicharStreamLoader.h | 82 +++ netwerk/build/nsNetCID.h | 13 + netwerk/build/nsNetModule.cpp | 5 + netwerk/macbuild/netwerk.xml | 54 ++ netwerk/macbuild/netwerkIDL.xml | 30 + 23 files changed, 1351 insertions(+), 640 deletions(-) create mode 100644 netwerk/base/public/nsIUnicharStreamLoader.idl create mode 100644 netwerk/base/src/nsUnicharStreamLoader.cpp create mode 100644 netwerk/base/src/nsUnicharStreamLoader.h diff --git a/content/html/content/src/nsGenericHTMLElement.cpp b/content/html/content/src/nsGenericHTMLElement.cpp index 995f90c8294f..d9a04b58366b 100644 --- a/content/html/content/src/nsGenericHTMLElement.cpp +++ b/content/html/content/src/nsGenericHTMLElement.cpp @@ -3424,15 +3424,6 @@ nsGenericHTMLElement::ParseStyleAttribute(const nsAString& aValue, nsHTMLValue& } if (cssLoader) { result = cssLoader->GetParserFor(nsnull, getter_AddRefs(cssParser)); - - static const char charsetStr[] = "charset="; - PRInt32 charsetOffset = styleType.Find(charsetStr, PR_TRUE); - if (charsetOffset > 0) { - nsString charset; - styleType.Right(charset, styleType.Length() - - (charsetOffset + sizeof(charsetStr) - 1)); - (void)cssLoader->SetCharset(charset); - } } else { result = NS_NewCSSParser(getter_AddRefs(cssParser)); diff --git a/content/html/style/public/nsICSSLoader.h b/content/html/style/public/nsICSSLoader.h index ff0a635d25f5..ed1966d0761b 100644 --- a/content/html/style/public/nsICSSLoader.h +++ b/content/html/style/public/nsICSSLoader.h @@ -125,13 +125,6 @@ public: PRBool& aCompleted, nsICSSLoaderObserver* aObserver) = 0; - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const = 0; - - // SetCharset will ensure that the charset provided is the preferred charset - // if an empty string, then it is set to the default charset - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc) = 0; - // stop loading all sheets NS_IMETHOD Stop(void) = 0; diff --git a/content/html/style/public/nsICSSParser.h b/content/html/style/public/nsICSSParser.h index 2a1071a3b1fb..cd5233da4e15 100644 --- a/content/html/style/public/nsICSSParser.h +++ b/content/html/style/public/nsICSSParser.h @@ -106,17 +106,6 @@ public: nsIURI* aBaseURL, nsCSSDeclaration* aDeclaration, nsChangeHint* aHint) = 0; - - // Charset management method: - // Set the charset before calling any of the Parse emthods if you want the - // charset to be anything other than the default - - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const = 0; - - // SetCharset expects the charset to be the preferred charset - // and it just records the string exactly as passed in (no alias resolution) - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc) = 0; }; // Values or'd in the GetInfoMask; other bits are reserved diff --git a/content/html/style/src/nsCSSLoader.cpp b/content/html/style/src/nsCSSLoader.cpp index e9d4fe741779..5a33d3413726 100644 --- a/content/html/style/src/nsCSSLoader.cpp +++ b/content/html/style/src/nsCSSLoader.cpp @@ -36,7 +36,7 @@ #include "nsIStyleSheetLinkingElement.h" #include "nsIDocument.h" #include "nsINameSpaceManager.h" -#include "nsIStreamLoader.h" +#include "nsIUnicharStreamLoader.h" #include "nsIUnicharInputStream.h" #include "nsIConverterInputStream.h" #include "nsICharsetConverterManager.h" @@ -128,7 +128,7 @@ public: nsSharableCString mSpec; }; -class SheetLoadData : public nsIStreamLoaderObserver +class SheetLoadData : public nsIUnicharStreamLoaderObserver { public: virtual ~SheetLoadData(void); @@ -146,7 +146,7 @@ public: nsICSSLoaderObserver* aObserver); NS_DECL_ISUPPORTS - NS_DECL_NSISTREAMLOADEROBSERVER + NS_DECL_NSIUNICHARSTREAMLOADEROBSERVER CSSLoaderImpl* mLoader; nsIURI* mURL; @@ -175,7 +175,7 @@ public: nsICSSLoaderObserver* mObserver; }; -NS_IMPL_ISUPPORTS1(SheetLoadData, nsIStreamLoaderObserver); +NS_IMPL_ISUPPORTS1(SheetLoadData, nsIUnicharStreamLoaderObserver); MOZ_DECL_CTOR_COUNTER(PendingSheetData) @@ -266,8 +266,8 @@ public: nsresult ParseSheet(nsIUnicharInputStream* aIn, SheetLoadData* aLoadData, PRBool& aCompleted, nsICSSStyleSheet*& aSheet); - void DidLoadStyle(nsIStreamLoader* aLoader, - nsString* aStyleData, // takes ownership, will delete when done + void DidLoadStyle(nsIUnicharStreamLoader* aLoader, + nsIUnicharInputStream* aStyleDataStream, SheetLoadData* aLoadData, nsresult aStatus); @@ -310,24 +310,6 @@ public: nsHashtable mSheetMapTable; // map to insertion index arrays - // @charset support - nsString mCharset; // the charset we are using - - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const; // PUBLIC - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc); // PUBLIC - // public method for clients to set the charset if they know it - // NOTE: the SetCharset method will always get the preferred - // charset from the charset passed in unless it is the - // emptystring, which causes the default charset (that of the - // document, falling back to ISO-8869-1) to be set - - nsresult SetCharset(/*in*/ const char* aStyleSheetData, - /*in*/ PRUint32 aDataLength); - // sets the charset based upon the data passed in - // - if the StyleSheetData is not empty and it has '@charset' - // as the first substring, then use that - // - otherwise return an error - // stop loading all sheets NS_IMETHOD Stop(void); @@ -451,7 +433,6 @@ CSSLoaderImpl::CSSLoaderImpl(void) mCaseSensitive = PR_FALSE; mCompatMode = eCompatibility_FullStandards; mParsers = nsnull; - SetCharset(NS_LITERAL_STRING("")); } static PRBool PR_CALLBACK ReleaseSheet(nsHashKey* aKey, void* aData, void* aClosure) @@ -584,7 +565,6 @@ CSSLoaderImpl::GetParserFor(nsICSSStyleSheet* aSheet, if (*aParser) { (*aParser)->SetCaseSensitive(mCaseSensitive); (*aParser)->SetQuirkMode(mCompatMode == eCompatibility_NavQuirks); - (*aParser)->SetCharset(mCharset); if (aSheet) { (*aParser)->SetStyleSheet(aSheet); } @@ -609,6 +589,295 @@ CSSLoaderImpl::RecycleParser(nsICSSParser* aParser) return result; } +// XXX We call this function a good bit. Consider caching the service +// in a static global or something? +static nsresult ResolveCharset(const nsAString& aCharsetAlias, + nsAString& aCharset) +{ + nsresult rv = NS_ERROR_NOT_AVAILABLE; + if (! aCharsetAlias.IsEmpty()) { + nsCOMPtr calias(do_GetService(kCharsetAliasCID, &rv)); + NS_ASSERTION(calias, "cannot find charset alias service"); + if (calias) + { + rv = calias->GetPreferred(aCharsetAlias, aCharset); + } + } + return rv; +} + +static const char kCharsetSym[] = "@charset"; + +static nsresult GetCharsetFromData(const unsigned char* aStyleSheetData, + PRUint32 aDataLength, + nsAString& aCharset) +{ + aCharset.Truncate(); + if (aDataLength <= sizeof(kCharsetSym) - 1) + return NS_ERROR_NOT_AVAILABLE; + PRUint32 step = 1; + PRUint32 pos = 0; + // Determine the encoding type + if (*aStyleSheetData == 0x40 && *(aStyleSheetData+1) == 0x63 /* '@c' */ ) { + // 1-byte ASCII-based encoding (ISO-8859-*, UTF-8, etc) + step = 1; + pos = 0; + } + else if (*aStyleSheetData == 0xEF && + *(aStyleSheetData+1) == 0xBB && + *(aStyleSheetData+2) == 0xBF) { + // UTF-8 BOM + step = 1; + pos = 3; + } + else if (*aStyleSheetData == 0xFE && *(aStyleSheetData+1) == 0xFF) { + // big-endian 2-byte encoding BOM + step = 2; + pos = 3; + } + else if (*aStyleSheetData == 0xFF && *(aStyleSheetData+1) == 0xFE) { + // little-endian 2-byte encoding BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 2; + pos = 2; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0xFE && + *(aStyleSheetData+3) == 0xFF) { + // big-endian 4-byte encoding BOM + step = 4; + pos = 7; + } + else if (*aStyleSheetData == 0xFF && + *(aStyleSheetData+1) == 0xFE && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // little-endian 4-byte encoding BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 4; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0xFF && + *(aStyleSheetData+3) == 0xFE) { + // 4-byte encoding BOM in 2143 order + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 6; + } + else if (*aStyleSheetData == 0xFE && + *(aStyleSheetData+1) == 0xFF && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding BOM in 3412 order + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 5; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x40) { + // big-endian 4-byte encoding, no BOM + step = 4; + pos = 3; + } + else if (*aStyleSheetData == 0x40 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // little-encoding 4-byte encoding, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 0; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x40 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 2143 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 2; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 3412 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 1; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 3412 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 1; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x63) { + // 2-byte big-endian encoding, no BOM + step = 2; + pos = 1; + } + else if (*aStyleSheetData == 0x40 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x63 && + *(aStyleSheetData+3) == 0x00) { + // 2-byte big-endian encoding, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 2; + pos = 0; + } + else { + // no clue what this is + return NS_ERROR_UNEXPECTED; + } + + PRUint32 index = 0; + while (pos < aDataLength && index < sizeof(kCharsetSym) - 1) { + if (aStyleSheetData[pos] != kCharsetSym[index]) { + return NS_ERROR_NOT_AVAILABLE; + } + ++index; + pos += step; + } + + while (pos < aDataLength && nsCRT::IsAsciiSpace(aStyleSheetData[pos])) { + pos += step; + } + + if (pos >= aDataLength || + (aStyleSheetData[pos] != '"' && aStyleSheetData[pos] != '\'')) { + return NS_ERROR_NOT_AVAILABLE; + } + + char quote = aStyleSheetData[pos]; + pos += step; + while (pos < aDataLength) { + if (aStyleSheetData[pos] == '\\') { + pos += step; + if (pos >= aDataLength) { + break; + } + } else if (aStyleSheetData[pos] == quote) { + break; + } + + aCharset.Append(PRUnichar(aStyleSheetData[pos])); + pos += step; + } + + // Check for the ending ';' + pos += step; + while (pos < aDataLength && nsCRT::IsAsciiSpace(aStyleSheetData[pos])) { + pos += step; + } + + if (pos >= aDataLength || aStyleSheetData[pos] != ';') { + aCharset.Truncate(); + return NS_ERROR_NOT_AVAILABLE; + } + + return NS_OK; +} + +NS_IMETHODIMP +SheetLoadData::OnDetermineCharset(nsIUnicharStreamLoader* aLoader, + nsISupports* aContext, + const char* aData, + PRUint32 aDataLength, + nsACString& aCharset) +{ + nsCOMPtr channel; + nsresult result = aLoader->GetChannel(getter_AddRefs(channel)); + if (NS_FAILED(result)) + channel = nsnull; + + /* + * First determine the charset (if one is indicated) + * 1) Check nsIChannel::contentCharset + * 2) Check @charset rules in the data + * 3) Check "charset" attribute of the or + * + * If all these fail to give us a charset, fall back on our + * default (document charset or ISO-8859-1 if we have no document + * charset) + */ + nsAutoString charset; + nsAutoString charsetCandidate; + if (channel) { + nsCAutoString charsetVal; + channel->GetContentCharset(charsetVal); + CopyASCIItoUCS2(charsetVal, charsetCandidate); + } + + result = NS_ERROR_NOT_AVAILABLE; + if (! charsetCandidate.IsEmpty()) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from HTTP to: %s\n", NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + + if (NS_FAILED(result)) { + // We have no charset or the HTTP charset is not recognized. + // Try @charset rule + result = GetCharsetFromData((const unsigned char*)aData, + aDataLength, charsetCandidate); + if (NS_SUCCEEDED(result)) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from @charset rule: %s\n", + NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + } + + if (NS_FAILED(result)) { + // Now try the charset on the or processing instruction + // that loaded us + nsCOMPtr + element(do_QueryInterface(mOwningElement)); + if (element) { + element->GetCharset(charsetCandidate); + if (! charsetCandidate.IsEmpty()) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from property on element: %s\n", + NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + } + } + + if (NS_FAILED(result) && mLoader->mDocument) { + // no useful data on charset. Try the document charset. + // That needs no resolution, since it's already fully resolved + mLoader->mDocument->GetDocumentCharacterSet(charset); +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Set from document: %s\n", + NS_ConvertUCS2toUTF8(charset).get()); +#endif + } + + if (charset.IsEmpty()) { + NS_WARNING("Unable to determine charset for sheet, using ISO-8859-1!"); + charset = NS_LITERAL_STRING("ISO-8859-1"); + } + + aCharset = NS_ConvertUCS2toUTF8(charset); + return NS_OK; +} + /** * Report an error to the error console. * @param aErrorName The name of a string in css.properties. @@ -659,25 +928,22 @@ ReportToConsole(const PRUnichar* aMessageName, const PRUnichar **aParams, } NS_IMETHODIMP -SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, +SheetLoadData::OnStreamComplete(nsIUnicharStreamLoader* aLoader, nsISupports* aContext, nsresult aStatus, - PRUint32 aStringLen, - const char* aString) + nsIUnicharInputStream* aDataStream) { NS_TIMELINE_OUTDENT(); NS_TIMELINE_MARK_LOADER("SheetLoadData::OnStreamComplete(%s)", aLoader); - nsresult result = NS_OK; - nsString *strUnicodeBuffer = nsnull; - - nsCOMPtr request; - result = aLoader->GetRequest(getter_AddRefs(request)); + nsCOMPtr channel; + nsresult result = aLoader->GetChannel(getter_AddRefs(channel)); if (NS_FAILED(result)) - request = nsnull; + channel = nsnull; + // If it's an HTTP channel, we want to make sure this is not an // error document we got. PRBool realDocument = PR_TRUE; - nsCOMPtr httpChannel(do_QueryInterface(request)); + nsCOMPtr httpChannel(do_QueryInterface(channel)); if (httpChannel) { PRBool requestSucceeded; result = httpChannel->GetRequestSucceeded(&requestSucceeded); @@ -685,10 +951,9 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, realDocument = PR_FALSE; } } - - if (realDocument && aString && aStringLen>0) { + + if (realDocument && aDataStream) { nsCAutoString contentType; - nsCOMPtr channel(do_QueryInterface(request)); if (channel) { channel->GetContentType(contentType); } @@ -713,109 +978,10 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, ReportToConsole(NS_LITERAL_STRING("MimeNotCssWarn").get(), strings, 2, nsIScriptError::warningFlag); } - - /* - * First determine the charset (if one is indicated) - * 1) Check nsIChannel::contentCharset - * 2) Check @charset rules - * 3) Check "charset" attribute of the or - * - * If all these fail to give us a charset, fall back on our - * default (document charset or ISO-8859-1 if we have no document - * charset) - */ - nsAutoString strChannelCharset; - if (channel) { - nsCAutoString charsetVal; - channel->GetContentCharset(charsetVal); - CopyASCIItoUCS2(charsetVal, strChannelCharset); - } - result = NS_ERROR_NOT_AVAILABLE; - if (! strChannelCharset.IsEmpty()) { - result = mLoader->SetCharset(strChannelCharset); - } - if (NS_FAILED(result)) { - // We have no charset or the HTTP charset is not recognized. - // Try @charset rules - result = mLoader->SetCharset(aString, aStringLen); - } - if (NS_FAILED(result)) { - // Now try the charset on the or processing instruction - // that loaded us - nsCOMPtr - element(do_QueryInterface(mOwningElement)); - if (element) { - nsAutoString linkCharset; - element->GetCharset(linkCharset); - if (! linkCharset.IsEmpty()) { - result = mLoader->SetCharset(linkCharset); - } - } - } - if (NS_FAILED(result)) { - // no useful data on charset. Just set to empty string, and let - // SetCharset pick a sane default - mLoader->SetCharset(NS_LITERAL_STRING("")); - } - { - // now get the decoder - nsCOMPtr ccm = - do_GetService(kCharsetConverterManagerCID, &result); - if (NS_SUCCEEDED(result) && ccm) { - nsString charset; - mLoader->GetCharset(charset); - nsIUnicodeDecoder *decoder = nsnull; - ccm->GetUnicodeDecoder(&charset,&decoder); - if (decoder) { - PRInt32 unicodeLength=0; - if (NS_SUCCEEDED(decoder->GetMaxLength(aString,aStringLen,&unicodeLength))) { - PRUnichar *unicodeString = nsnull; - strUnicodeBuffer = new nsString; - if (nsnull == strUnicodeBuffer) { - result = NS_ERROR_OUT_OF_MEMORY; - } else { - // make space for the decoding - strUnicodeBuffer->SetCapacity(unicodeLength); - unicodeString = (PRUnichar *) strUnicodeBuffer->get(); - PRInt32 totalChars = 0; - PRInt32 unicharLength = unicodeLength; - do { - PRInt32 srcLength = aStringLen; - result = decoder->Convert(aString, &srcLength, unicodeString, &unicharLength); - - totalChars += unicharLength; - if (NS_FAILED(result)) { - // if we failed, we consume one byte, replace it with U+FFFD - // and try the conversion again. - unicodeString[unicharLength++] = (PRUnichar)0xFFFD; - unicodeString = unicodeString + unicharLength; - unicharLength = unicodeLength - (++totalChars); - - decoder->Reset(); - - if (((PRUint32) (srcLength + 1)) > aStringLen) { - srcLength = aStringLen; - } else { - srcLength++; - } - - aString += srcLength; - aStringLen -= srcLength; - } - } while (NS_FAILED(result) && (aStringLen > 0)); - - // Don't propagate return code of unicode decoder - // since it doesn't reflect on our success or failure - // - Ref. bug 87110 - result = NS_OK; - strUnicodeBuffer->SetLength(totalChars); - } - } - NS_RELEASE(decoder); - } - } - } } else { + // Drop the data stream so that we do not load it + aDataStream = nsnull; + nsCAutoString spec; if (channel) { nsCOMPtr uri; @@ -831,17 +997,16 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, ReportToConsole(NS_LITERAL_STRING("MimeNotCss").get(), strings, 2, nsIScriptError::errorFlag); } + } else { + // Drop the data stream so that we do not load it + aDataStream = nsnull; } - - mLoader->DidLoadStyle(aLoader, strUnicodeBuffer, this, aStatus); - // NOTE: passed ownership of strUnicodeBuffer to mLoader in the call, - // so nulling it out for clarity / safety - strUnicodeBuffer = nsnull; - + + mLoader->DidLoadStyle(aLoader, aDataStream, this, aStatus); // We added a reference when the loader was created. This // release should destroy it. NS_RELEASE(aLoader); - return result; + return NS_OK; } static PRBool PR_CALLBACK @@ -1068,8 +1233,8 @@ CSSLoaderImpl::ParseSheet(nsIUnicharInputStream* aIn, } void -CSSLoaderImpl::DidLoadStyle(nsIStreamLoader* aLoader, - nsString* aStyleData, +CSSLoaderImpl::DidLoadStyle(nsIUnicharStreamLoader* aLoader, + nsIUnicharInputStream* aStyleDataStream, SheetLoadData* aLoadData, nsresult aStatus) { @@ -1077,25 +1242,12 @@ CSSLoaderImpl::DidLoadStyle(nsIStreamLoader* aLoader, NS_ASSERTION(! mSyncCallback, "getting synchronous callback from netlib"); #endif - if (NS_SUCCEEDED(aStatus) && (aStyleData) && (!aStyleData->IsEmpty()) && (mDocument)) { - nsresult result; - nsIUnicharInputStream* uin = nsnull; - - // wrap the string with the CSS data up in a unicode input stream. - result = NS_NewStringUnicharInputStream(&uin, aStyleData); - - if (NS_SUCCEEDED(result)) { - // XXX We have no way of indicating failure. Silently fail? - PRBool completed; - nsICSSStyleSheet* sheet; - result = ParseSheet(uin, aLoadData, completed, sheet); - NS_IF_RELEASE(sheet); - NS_IF_RELEASE(uin); - } - else { - URLKey key(aLoadData->mURL); - Cleanup(key, aLoadData); - } + if (NS_SUCCEEDED(aStatus) && aStyleDataStream && mDocument) { + // XXX We have no way of indicating failure. Silently fail? + PRBool completed; + nsCOMPtr sheet; + ParseSheet(aStyleDataStream, aLoadData, completed, *getter_AddRefs(sheet)); + // XXX clean up if failure or something? } else { // load failed or document now gone, cleanup #ifdef DEBUG @@ -1399,9 +1551,9 @@ CSSLoaderImpl::LoadSheet(URLKey& aKey, SheetLoadData* aData) } } else if (mDocument || aData->mIsAgent) { // we're still live, start an async load - nsIStreamLoader* loader; - nsIURI* urlClone; - result = aKey.mURL->Clone(&urlClone); // dont give key URL to netlib, it gets munged + nsIUnicharStreamLoader* loader; + nsCOMPtr urlClone; + result = aKey.mURL->Clone(getter_AddRefs(urlClone)); // dont give key URL to netlib, it gets munged if (NS_SUCCEEDED(result)) { #ifdef NS_DEBUG mSyncCallback = PR_TRUE; @@ -1418,14 +1570,27 @@ CSSLoaderImpl::LoadSheet(URLKey& aKey, SheetLoadData* aData) NS_TIMELINE_MARK_URI("Loading style sheet: %s", urlClone); NS_TIMELINE_INDENT(); #endif - result = NS_NewStreamLoader(&loader, urlClone, aData, nsnull, - loadGroup, nsnull, nsIChannel::LOAD_NORMAL, - document_uri, - nsIHttpChannel::REFERRER_INLINES); + nsCOMPtr channel; + result = NS_NewChannel(getter_AddRefs(channel), + urlClone, nsnull, loadGroup, + nsnull, nsIChannel::LOAD_NORMAL); + if (NS_SUCCEEDED(result)) { + if (document_uri) { + nsCOMPtr httpChannel(do_QueryInterface(channel)); + if (httpChannel) { + result = httpChannel->SetReferrer(document_uri, + nsIHttpChannel::REFERRER_INLINES); + } + } + + if (NS_SUCCEEDED(result)) { + result = NS_NewUnicharStreamLoader(&loader, channel, aData); + } + } + #ifdef NS_DEBUG mSyncCallback = PR_FALSE; #endif - NS_RELEASE(urlClone); if (NS_SUCCEEDED(result)) { mLoadingSheets.Put(&aKey, aData); // grab any pending alternates that have this URL @@ -1711,7 +1876,8 @@ CSSLoaderImpl::LoadAgentSheet(nsIURI* aURL, do_CreateInstance("@mozilla.org/intl/converter-input-stream;1", &result); if (NS_SUCCEEDED(result)) - result = uin->Init(in, mCharset.get(), 0); + result = uin->Init(in, NS_LITERAL_STRING("ISO-8859-1").get(), + 0, PR_TRUE); if (NS_SUCCEEDED(result)) { SheetLoadData* data = new SheetLoadData(this, aURL, aObserver); if (data == nsnull) { @@ -1792,87 +1958,6 @@ nsresult NS_NewCSSLoader(nsICSSLoader** aLoader) } - -NS_IMETHODIMP CSSLoaderImpl::GetCharset(/*out*/nsAString &aCharsetDest) const -{ - NS_ASSERTION(!mCharset.IsEmpty(), "CSSLoader charset should be set in ctor" ); - nsresult rv = NS_OK; - aCharsetDest = mCharset; - return rv; -} - -NS_IMETHODIMP CSSLoaderImpl::SetCharset(/*in*/ const nsAString &aCharsetSrc) - // public method for clients to set the charset if they know it - // NOTE: the SetCharset method will always get the preferred - // charset from the charset passed in unless it is the - // emptystring, which causes the default charset (that of the - // document, falling back to ISO-8869-1) to be set -{ - nsresult rv = NS_ERROR_NOT_AVAILABLE; - if (! aCharsetSrc.IsEmpty()) { - nsCOMPtr calias(do_GetService(kCharsetAliasCID, &rv)); - NS_ASSERTION(calias, "cannot find charset alias"); - if (calias) - { - PRBool same = PR_FALSE; - rv = calias->Equals(aCharsetSrc, mCharset, &same); - if(NS_SUCCEEDED(rv) && same) - { - return NS_OK; // no difference, don't change it - } - rv = calias->GetPreferred(aCharsetSrc, mCharset); - } - } else if (mDocument) { - // GetDocumentCharacterSet returns a charset which already has - // alias resolution done - rv = mDocument->GetDocumentCharacterSet(mCharset); - } - if (mCharset.IsEmpty()) { - mCharset = NS_LITERAL_STRING("ISO-8859-1"); - rv = NS_ERROR_NOT_AVAILABLE; - } - - return rv; -} - -nsresult CSSLoaderImpl::SetCharset(/*in*/ const char* aStyleSheetData, - /*in*/ PRUint32 aDataLength) - // sets the charset based upon the data passed in - // - if the StyleSheetData is not empty and it has '@charset' - // as the first substring, then use that - // - otherwise return an error -{ - nsresult rv = NS_ERROR_NOT_AVAILABLE; - nsString strStyleDataUndecoded; - strStyleDataUndecoded.AssignWithConversion(aStyleSheetData, aDataLength); - PRInt32 charsetOffset; - if (!strStyleDataUndecoded.IsEmpty()) { - nsString str; - static const char atCharsetStr[] = "@charset"; - if ((charsetOffset = strStyleDataUndecoded.Find(atCharsetStr)) > -1) { - nsString strValue; - // skip past the ident - strStyleDataUndecoded.Right(str, strStyleDataUndecoded.Length() - - (sizeof(atCharsetStr)-1)); - // strip any whitespace - str.StripWhitespace(); - // truncate everything past the delimiter (semicolon) - PRInt32 pos = str.Find(";"); - if (pos > -1) { - str.Left(strValue,pos); - } - // strip any quotes - strValue.Trim("\"\'"); - - // that's the charset! - if (!strValue.IsEmpty()) { - rv = SetCharset(strValue); - } - } - } - return rv; -} - static PRBool PR_CALLBACK StopLoadingSheetCallback(nsHashKey* aKey, void* aData, void* aClosure) { NS_ENSURE_TRUE(aData, NS_ERROR_NULL_POINTER); diff --git a/content/html/style/src/nsCSSParser.cpp b/content/html/style/src/nsCSSParser.cpp index 8294a6d288b7..4c56864fdd47 100644 --- a/content/html/style/src/nsCSSParser.cpp +++ b/content/html/style/src/nsCSSParser.cpp @@ -181,11 +181,6 @@ public: nsCSSDeclaration* aDeclaration, nsChangeHint* aHint); - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const; - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc); - // NOTE: SetCharset expects the charset to be the preferred charset - // and it just records the string exactly as passed in (no alias resolution) void AppendRule(nsICSSRule* aRule); protected: @@ -353,8 +348,6 @@ protected: nsISupportsArray* mGroupStack; - nsString mCharset; // the charset we are using - PRBool mParsingCompoundProperty; void SetParsingCompoundProperty(PRBool aBool) {mParsingCompoundProperty = aBool;}; PRBool IsParsingCompoundProperty(void) {return mParsingCompoundProperty;}; @@ -438,9 +431,6 @@ CSSParserImpl::CSSParserImpl() mParsingCompoundProperty(PR_FALSE) { NS_INIT_REFCNT(); - - // set the default charset - mCharset.Assign(NS_LITERAL_STRING("ISO-8859-1")); } NS_IMETHODIMP @@ -5353,15 +5343,3 @@ PRBool CSSParserImpl::ParseTextShadow(PRInt32& aErrorCode, } return PR_FALSE; } - -NS_IMETHODIMP CSSParserImpl::GetCharset(/*out*/nsAString &aCharsetDest) const -{ - aCharsetDest = mCharset; - return NS_OK; -} - -NS_IMETHODIMP CSSParserImpl::SetCharset(/*in*/ const nsAString &aCharsetSrc) -{ - mCharset = aCharsetSrc; - return NS_OK; -} diff --git a/intl/uconv/public/nsIConverterInputStream.h b/intl/uconv/public/nsIConverterInputStream.h index 4b67ccd9e342..33564c6209bd 100644 --- a/intl/uconv/public/nsIConverterInputStream.h +++ b/intl/uconv/public/nsIConverterInputStream.h @@ -48,6 +48,6 @@ class nsIConverterInputStream : public nsIUnicharInputStream { NS_DEFINE_STATIC_IID_ACCESSOR(NS_ICONVERTERSTREAM_IID) NS_IMETHOD Init(nsIInputStream *aStream, const PRUnichar *aCharset, - PRInt32 aBufferSize) = 0; + PRInt32 aBufferSize, PRBool aRecoverFromErrors) = 0; }; diff --git a/intl/uconv/src/nsConverterInputStream.cpp b/intl/uconv/src/nsConverterInputStream.cpp index 0eafb0ab3907..f93e052be1b9 100644 --- a/intl/uconv/src/nsConverterInputStream.cpp +++ b/intl/uconv/src/nsConverterInputStream.cpp @@ -49,7 +49,8 @@ static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CI NS_IMETHODIMP nsConverterInputStream::Init(nsIInputStream* aStream, const PRUnichar *aCharset, - PRInt32 aBufferSize) + PRInt32 aBufferSize, + PRBool aRecoverFromErrors) { nsresult rv; @@ -77,6 +78,7 @@ nsConverterInputStream::Init(nsIInputStream* aStream, if (NS_FAILED(rv)) return rv; mInput = aStream; + mRecoverFromErrors = aRecoverFromErrors; return NS_OK; } @@ -99,13 +101,12 @@ nsConverterInputStream::Read(PRUnichar* aBuf, { NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); PRUint32 rv = mUnicharDataLength - mUnicharDataOffset; - nsresult errorCode; if (0 == rv) { // Fill the unichar buffer - rv = Fill(&errorCode); - if (rv <= 0) { + rv = Fill(&mLastErrorCode); + if (rv == 0) { *aReadCount = 0; - return errorCode; + return mLastErrorCode; } } if (rv > aCount) { @@ -118,36 +119,77 @@ nsConverterInputStream::Read(PRUnichar* aBuf, return NS_OK; } -PRInt32 +PRUint32 nsConverterInputStream::Fill(nsresult * aErrorCode) { if (nsnull == mInput) { // We already closed the stream! *aErrorCode = NS_BASE_STREAM_CLOSED; - return -1; + return 0; } - NS_ASSERTION(mByteData->GetLength() >= mByteDataOffset, "unsigned madness"); - PRUint32 remainder = mByteData->GetLength() - mByteDataOffset; - mByteDataOffset = remainder; - PRInt32 nb = mByteData->Fill(aErrorCode, mInput, remainder); - if (nb <= 0) { - // Because we assume a many to one conversion, the lingering data - // in the byte buffer must be a partial conversion - // fragment. Because we know that we have recieved no more new - // data to add to it, we can't convert it. Therefore, we discard - // it. - return nb; + if (NS_FAILED(mLastErrorCode)) { + // We failed to completely convert last time, and error-recovery + // is disabled. We will fare no better this time, so... + *aErrorCode = mLastErrorCode; + return 0; + } + + // We assume a many to one conversion and are using equal sizes for + // the two buffers. However if an error happens at the very start + // of a byte buffer we may end up in a situation where n bytes lead + // to n+1 unicode chars. Thus we need to keep track of the leftover + // bytes as we convert. + + PRInt32 nb = mByteData->Fill(aErrorCode, mInput, mLeftOverBytes); +#if defined(DEBUG_bzbarsky) && 0 + for (unsigned int foo = 0; foo < mByteData->GetLength(); ++foo) { + fprintf(stderr, "%c", mByteData->GetBuffer()[foo]); + } + fprintf(stderr, "\n"); +#endif + if (nb <= 0 && mLeftOverBytes == 0) { + // No more data + *aErrorCode = NS_OK; + return 0; } - NS_ASSERTION(remainder + nb == mByteData->GetLength(), "bad nb"); + NS_ASSERTION(PRUint32(nb) + mLeftOverBytes == mByteData->GetLength(), + "mByteData is lying to us somewhere"); + // Now convert as much of the byte buffer to unicode as possible - PRInt32 dstLen = mUnicharData->GetBufferSize(); - PRInt32 srcLen = remainder + nb; - *aErrorCode = mConverter->Convert(mByteData->GetBuffer(), &srcLen, - mUnicharData->GetBuffer(), &dstLen); mUnicharDataOffset = 0; - mUnicharDataLength = dstLen; - mByteDataOffset += srcLen; - return dstLen; + mUnicharDataLength = 0; + PRUint32 srcConsumed = 0; + do { + PRInt32 srcLen = mByteData->GetLength() - srcConsumed; + PRInt32 dstLen = mUnicharData->GetBufferSize() - mUnicharDataLength; + *aErrorCode = mConverter->Convert(mByteData->GetBuffer()+srcConsumed, + &srcLen, + mUnicharData->GetBuffer()+mUnicharDataLength, + &dstLen); + mUnicharDataLength += dstLen; + // XXX if srcLen is negative, we want to drop the _first_ byte in + // the erroneous byte sequence and try again. This is not quite + // possible right now -- see bug 160784 + srcConsumed += srcLen; + if (NS_FAILED(*aErrorCode) && mRecoverFromErrors) { + NS_ASSERTION(0 < mUnicharData->GetBufferSize() - mUnicharDataLength, + "Decoder returned an error but filled the output buffer! " + "Should not happen."); + mUnicharData->GetBuffer()[mUnicharDataLength++] = (PRUnichar)0xFFFD; + ++srcConsumed; + // XXX this is needed to make sure we don't underrun our buffer; + // bug 160784 again + srcConsumed = PR_MAX(srcConsumed, 0); + mConverter->Reset(); + } + NS_ASSERTION(srcConsumed <= mByteData->GetLength(), + "Whoa. The converter should have returned NS_OK_UDEC_MOREINPUT before this point!"); + } while (mRecoverFromErrors && + NS_FAILED(*aErrorCode)); + + mLeftOverBytes = mByteData->GetLength() - srcConsumed; + + return mUnicharDataLength; } diff --git a/intl/uconv/src/nsConverterInputStream.h b/intl/uconv/src/nsConverterInputStream.h index d6d8296ecf82..2b0356ed578a 100644 --- a/intl/uconv/src/nsConverterInputStream.h +++ b/intl/uconv/src/nsConverterInputStream.h @@ -63,27 +63,31 @@ class nsConverterInputStream : nsIConverterInputStream { PRUint32 *aReadCount); NS_IMETHOD Close(); NS_IMETHOD Init(nsIInputStream* aStream, const PRUnichar *aCharset, - PRInt32 aBufferSize); + PRInt32 aBufferSize, PRBool aRecoverFromErrors); nsConverterInputStream() : - mByteDataOffset(0), + mLastErrorCode(NS_OK), + mLeftOverBytes(0), mUnicharDataOffset(0), - mUnicharDataLength(0) { NS_INIT_REFCNT(); } + mUnicharDataLength(0), + mRecoverFromErrors(PR_FALSE) { NS_INIT_REFCNT(); } virtual ~nsConverterInputStream() {} private: - PRInt32 Fill(nsresult *aErrorCode); + PRUint32 Fill(nsresult *aErrorCode); nsCOMPtr mConverter; nsCOMPtr mByteData; nsCOMPtr mUnicharData; nsCOMPtr mInput; - - PRUint32 mByteDataOffset; + + nsresult mLastErrorCode; + PRUint32 mLeftOverBytes; PRUint32 mUnicharDataOffset; PRUint32 mUnicharDataLength; + PRBool mRecoverFromErrors; }; diff --git a/layout/style/nsCSSLoader.cpp b/layout/style/nsCSSLoader.cpp index e9d4fe741779..5a33d3413726 100644 --- a/layout/style/nsCSSLoader.cpp +++ b/layout/style/nsCSSLoader.cpp @@ -36,7 +36,7 @@ #include "nsIStyleSheetLinkingElement.h" #include "nsIDocument.h" #include "nsINameSpaceManager.h" -#include "nsIStreamLoader.h" +#include "nsIUnicharStreamLoader.h" #include "nsIUnicharInputStream.h" #include "nsIConverterInputStream.h" #include "nsICharsetConverterManager.h" @@ -128,7 +128,7 @@ public: nsSharableCString mSpec; }; -class SheetLoadData : public nsIStreamLoaderObserver +class SheetLoadData : public nsIUnicharStreamLoaderObserver { public: virtual ~SheetLoadData(void); @@ -146,7 +146,7 @@ public: nsICSSLoaderObserver* aObserver); NS_DECL_ISUPPORTS - NS_DECL_NSISTREAMLOADEROBSERVER + NS_DECL_NSIUNICHARSTREAMLOADEROBSERVER CSSLoaderImpl* mLoader; nsIURI* mURL; @@ -175,7 +175,7 @@ public: nsICSSLoaderObserver* mObserver; }; -NS_IMPL_ISUPPORTS1(SheetLoadData, nsIStreamLoaderObserver); +NS_IMPL_ISUPPORTS1(SheetLoadData, nsIUnicharStreamLoaderObserver); MOZ_DECL_CTOR_COUNTER(PendingSheetData) @@ -266,8 +266,8 @@ public: nsresult ParseSheet(nsIUnicharInputStream* aIn, SheetLoadData* aLoadData, PRBool& aCompleted, nsICSSStyleSheet*& aSheet); - void DidLoadStyle(nsIStreamLoader* aLoader, - nsString* aStyleData, // takes ownership, will delete when done + void DidLoadStyle(nsIUnicharStreamLoader* aLoader, + nsIUnicharInputStream* aStyleDataStream, SheetLoadData* aLoadData, nsresult aStatus); @@ -310,24 +310,6 @@ public: nsHashtable mSheetMapTable; // map to insertion index arrays - // @charset support - nsString mCharset; // the charset we are using - - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const; // PUBLIC - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc); // PUBLIC - // public method for clients to set the charset if they know it - // NOTE: the SetCharset method will always get the preferred - // charset from the charset passed in unless it is the - // emptystring, which causes the default charset (that of the - // document, falling back to ISO-8869-1) to be set - - nsresult SetCharset(/*in*/ const char* aStyleSheetData, - /*in*/ PRUint32 aDataLength); - // sets the charset based upon the data passed in - // - if the StyleSheetData is not empty and it has '@charset' - // as the first substring, then use that - // - otherwise return an error - // stop loading all sheets NS_IMETHOD Stop(void); @@ -451,7 +433,6 @@ CSSLoaderImpl::CSSLoaderImpl(void) mCaseSensitive = PR_FALSE; mCompatMode = eCompatibility_FullStandards; mParsers = nsnull; - SetCharset(NS_LITERAL_STRING("")); } static PRBool PR_CALLBACK ReleaseSheet(nsHashKey* aKey, void* aData, void* aClosure) @@ -584,7 +565,6 @@ CSSLoaderImpl::GetParserFor(nsICSSStyleSheet* aSheet, if (*aParser) { (*aParser)->SetCaseSensitive(mCaseSensitive); (*aParser)->SetQuirkMode(mCompatMode == eCompatibility_NavQuirks); - (*aParser)->SetCharset(mCharset); if (aSheet) { (*aParser)->SetStyleSheet(aSheet); } @@ -609,6 +589,295 @@ CSSLoaderImpl::RecycleParser(nsICSSParser* aParser) return result; } +// XXX We call this function a good bit. Consider caching the service +// in a static global or something? +static nsresult ResolveCharset(const nsAString& aCharsetAlias, + nsAString& aCharset) +{ + nsresult rv = NS_ERROR_NOT_AVAILABLE; + if (! aCharsetAlias.IsEmpty()) { + nsCOMPtr calias(do_GetService(kCharsetAliasCID, &rv)); + NS_ASSERTION(calias, "cannot find charset alias service"); + if (calias) + { + rv = calias->GetPreferred(aCharsetAlias, aCharset); + } + } + return rv; +} + +static const char kCharsetSym[] = "@charset"; + +static nsresult GetCharsetFromData(const unsigned char* aStyleSheetData, + PRUint32 aDataLength, + nsAString& aCharset) +{ + aCharset.Truncate(); + if (aDataLength <= sizeof(kCharsetSym) - 1) + return NS_ERROR_NOT_AVAILABLE; + PRUint32 step = 1; + PRUint32 pos = 0; + // Determine the encoding type + if (*aStyleSheetData == 0x40 && *(aStyleSheetData+1) == 0x63 /* '@c' */ ) { + // 1-byte ASCII-based encoding (ISO-8859-*, UTF-8, etc) + step = 1; + pos = 0; + } + else if (*aStyleSheetData == 0xEF && + *(aStyleSheetData+1) == 0xBB && + *(aStyleSheetData+2) == 0xBF) { + // UTF-8 BOM + step = 1; + pos = 3; + } + else if (*aStyleSheetData == 0xFE && *(aStyleSheetData+1) == 0xFF) { + // big-endian 2-byte encoding BOM + step = 2; + pos = 3; + } + else if (*aStyleSheetData == 0xFF && *(aStyleSheetData+1) == 0xFE) { + // little-endian 2-byte encoding BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 2; + pos = 2; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0xFE && + *(aStyleSheetData+3) == 0xFF) { + // big-endian 4-byte encoding BOM + step = 4; + pos = 7; + } + else if (*aStyleSheetData == 0xFF && + *(aStyleSheetData+1) == 0xFE && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // little-endian 4-byte encoding BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 4; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0xFF && + *(aStyleSheetData+3) == 0xFE) { + // 4-byte encoding BOM in 2143 order + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 6; + } + else if (*aStyleSheetData == 0xFE && + *(aStyleSheetData+1) == 0xFF && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding BOM in 3412 order + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 5; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x40) { + // big-endian 4-byte encoding, no BOM + step = 4; + pos = 3; + } + else if (*aStyleSheetData == 0x40 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // little-encoding 4-byte encoding, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 0; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x40 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 2143 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 2; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 3412 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 1; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x00) { + // 4-byte encoding in 3412 order, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 4; + pos = 1; + } + else if (*aStyleSheetData == 0x00 && + *(aStyleSheetData+1) == 0x40 && + *(aStyleSheetData+2) == 0x00 && + *(aStyleSheetData+3) == 0x63) { + // 2-byte big-endian encoding, no BOM + step = 2; + pos = 1; + } + else if (*aStyleSheetData == 0x40 && + *(aStyleSheetData+1) == 0x00 && + *(aStyleSheetData+2) == 0x63 && + *(aStyleSheetData+3) == 0x00) { + // 2-byte big-endian encoding, no BOM + NS_WARNING("Our unicode decoders aren't likely to deal with this one"); + step = 2; + pos = 0; + } + else { + // no clue what this is + return NS_ERROR_UNEXPECTED; + } + + PRUint32 index = 0; + while (pos < aDataLength && index < sizeof(kCharsetSym) - 1) { + if (aStyleSheetData[pos] != kCharsetSym[index]) { + return NS_ERROR_NOT_AVAILABLE; + } + ++index; + pos += step; + } + + while (pos < aDataLength && nsCRT::IsAsciiSpace(aStyleSheetData[pos])) { + pos += step; + } + + if (pos >= aDataLength || + (aStyleSheetData[pos] != '"' && aStyleSheetData[pos] != '\'')) { + return NS_ERROR_NOT_AVAILABLE; + } + + char quote = aStyleSheetData[pos]; + pos += step; + while (pos < aDataLength) { + if (aStyleSheetData[pos] == '\\') { + pos += step; + if (pos >= aDataLength) { + break; + } + } else if (aStyleSheetData[pos] == quote) { + break; + } + + aCharset.Append(PRUnichar(aStyleSheetData[pos])); + pos += step; + } + + // Check for the ending ';' + pos += step; + while (pos < aDataLength && nsCRT::IsAsciiSpace(aStyleSheetData[pos])) { + pos += step; + } + + if (pos >= aDataLength || aStyleSheetData[pos] != ';') { + aCharset.Truncate(); + return NS_ERROR_NOT_AVAILABLE; + } + + return NS_OK; +} + +NS_IMETHODIMP +SheetLoadData::OnDetermineCharset(nsIUnicharStreamLoader* aLoader, + nsISupports* aContext, + const char* aData, + PRUint32 aDataLength, + nsACString& aCharset) +{ + nsCOMPtr channel; + nsresult result = aLoader->GetChannel(getter_AddRefs(channel)); + if (NS_FAILED(result)) + channel = nsnull; + + /* + * First determine the charset (if one is indicated) + * 1) Check nsIChannel::contentCharset + * 2) Check @charset rules in the data + * 3) Check "charset" attribute of the or + * + * If all these fail to give us a charset, fall back on our + * default (document charset or ISO-8859-1 if we have no document + * charset) + */ + nsAutoString charset; + nsAutoString charsetCandidate; + if (channel) { + nsCAutoString charsetVal; + channel->GetContentCharset(charsetVal); + CopyASCIItoUCS2(charsetVal, charsetCandidate); + } + + result = NS_ERROR_NOT_AVAILABLE; + if (! charsetCandidate.IsEmpty()) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from HTTP to: %s\n", NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + + if (NS_FAILED(result)) { + // We have no charset or the HTTP charset is not recognized. + // Try @charset rule + result = GetCharsetFromData((const unsigned char*)aData, + aDataLength, charsetCandidate); + if (NS_SUCCEEDED(result)) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from @charset rule: %s\n", + NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + } + + if (NS_FAILED(result)) { + // Now try the charset on the or processing instruction + // that loaded us + nsCOMPtr + element(do_QueryInterface(mOwningElement)); + if (element) { + element->GetCharset(charsetCandidate); + if (! charsetCandidate.IsEmpty()) { +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Setting from property on element: %s\n", + NS_ConvertUCS2toUTF8(charsetCandidate).get()); +#endif + result = ResolveCharset(charsetCandidate, charset); + } + } + } + + if (NS_FAILED(result) && mLoader->mDocument) { + // no useful data on charset. Try the document charset. + // That needs no resolution, since it's already fully resolved + mLoader->mDocument->GetDocumentCharacterSet(charset); +#ifdef DEBUG_bzbarsky + fprintf(stderr, "Set from document: %s\n", + NS_ConvertUCS2toUTF8(charset).get()); +#endif + } + + if (charset.IsEmpty()) { + NS_WARNING("Unable to determine charset for sheet, using ISO-8859-1!"); + charset = NS_LITERAL_STRING("ISO-8859-1"); + } + + aCharset = NS_ConvertUCS2toUTF8(charset); + return NS_OK; +} + /** * Report an error to the error console. * @param aErrorName The name of a string in css.properties. @@ -659,25 +928,22 @@ ReportToConsole(const PRUnichar* aMessageName, const PRUnichar **aParams, } NS_IMETHODIMP -SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, +SheetLoadData::OnStreamComplete(nsIUnicharStreamLoader* aLoader, nsISupports* aContext, nsresult aStatus, - PRUint32 aStringLen, - const char* aString) + nsIUnicharInputStream* aDataStream) { NS_TIMELINE_OUTDENT(); NS_TIMELINE_MARK_LOADER("SheetLoadData::OnStreamComplete(%s)", aLoader); - nsresult result = NS_OK; - nsString *strUnicodeBuffer = nsnull; - - nsCOMPtr request; - result = aLoader->GetRequest(getter_AddRefs(request)); + nsCOMPtr channel; + nsresult result = aLoader->GetChannel(getter_AddRefs(channel)); if (NS_FAILED(result)) - request = nsnull; + channel = nsnull; + // If it's an HTTP channel, we want to make sure this is not an // error document we got. PRBool realDocument = PR_TRUE; - nsCOMPtr httpChannel(do_QueryInterface(request)); + nsCOMPtr httpChannel(do_QueryInterface(channel)); if (httpChannel) { PRBool requestSucceeded; result = httpChannel->GetRequestSucceeded(&requestSucceeded); @@ -685,10 +951,9 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, realDocument = PR_FALSE; } } - - if (realDocument && aString && aStringLen>0) { + + if (realDocument && aDataStream) { nsCAutoString contentType; - nsCOMPtr channel(do_QueryInterface(request)); if (channel) { channel->GetContentType(contentType); } @@ -713,109 +978,10 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, ReportToConsole(NS_LITERAL_STRING("MimeNotCssWarn").get(), strings, 2, nsIScriptError::warningFlag); } - - /* - * First determine the charset (if one is indicated) - * 1) Check nsIChannel::contentCharset - * 2) Check @charset rules - * 3) Check "charset" attribute of the or - * - * If all these fail to give us a charset, fall back on our - * default (document charset or ISO-8859-1 if we have no document - * charset) - */ - nsAutoString strChannelCharset; - if (channel) { - nsCAutoString charsetVal; - channel->GetContentCharset(charsetVal); - CopyASCIItoUCS2(charsetVal, strChannelCharset); - } - result = NS_ERROR_NOT_AVAILABLE; - if (! strChannelCharset.IsEmpty()) { - result = mLoader->SetCharset(strChannelCharset); - } - if (NS_FAILED(result)) { - // We have no charset or the HTTP charset is not recognized. - // Try @charset rules - result = mLoader->SetCharset(aString, aStringLen); - } - if (NS_FAILED(result)) { - // Now try the charset on the or processing instruction - // that loaded us - nsCOMPtr - element(do_QueryInterface(mOwningElement)); - if (element) { - nsAutoString linkCharset; - element->GetCharset(linkCharset); - if (! linkCharset.IsEmpty()) { - result = mLoader->SetCharset(linkCharset); - } - } - } - if (NS_FAILED(result)) { - // no useful data on charset. Just set to empty string, and let - // SetCharset pick a sane default - mLoader->SetCharset(NS_LITERAL_STRING("")); - } - { - // now get the decoder - nsCOMPtr ccm = - do_GetService(kCharsetConverterManagerCID, &result); - if (NS_SUCCEEDED(result) && ccm) { - nsString charset; - mLoader->GetCharset(charset); - nsIUnicodeDecoder *decoder = nsnull; - ccm->GetUnicodeDecoder(&charset,&decoder); - if (decoder) { - PRInt32 unicodeLength=0; - if (NS_SUCCEEDED(decoder->GetMaxLength(aString,aStringLen,&unicodeLength))) { - PRUnichar *unicodeString = nsnull; - strUnicodeBuffer = new nsString; - if (nsnull == strUnicodeBuffer) { - result = NS_ERROR_OUT_OF_MEMORY; - } else { - // make space for the decoding - strUnicodeBuffer->SetCapacity(unicodeLength); - unicodeString = (PRUnichar *) strUnicodeBuffer->get(); - PRInt32 totalChars = 0; - PRInt32 unicharLength = unicodeLength; - do { - PRInt32 srcLength = aStringLen; - result = decoder->Convert(aString, &srcLength, unicodeString, &unicharLength); - - totalChars += unicharLength; - if (NS_FAILED(result)) { - // if we failed, we consume one byte, replace it with U+FFFD - // and try the conversion again. - unicodeString[unicharLength++] = (PRUnichar)0xFFFD; - unicodeString = unicodeString + unicharLength; - unicharLength = unicodeLength - (++totalChars); - - decoder->Reset(); - - if (((PRUint32) (srcLength + 1)) > aStringLen) { - srcLength = aStringLen; - } else { - srcLength++; - } - - aString += srcLength; - aStringLen -= srcLength; - } - } while (NS_FAILED(result) && (aStringLen > 0)); - - // Don't propagate return code of unicode decoder - // since it doesn't reflect on our success or failure - // - Ref. bug 87110 - result = NS_OK; - strUnicodeBuffer->SetLength(totalChars); - } - } - NS_RELEASE(decoder); - } - } - } } else { + // Drop the data stream so that we do not load it + aDataStream = nsnull; + nsCAutoString spec; if (channel) { nsCOMPtr uri; @@ -831,17 +997,16 @@ SheetLoadData::OnStreamComplete(nsIStreamLoader* aLoader, ReportToConsole(NS_LITERAL_STRING("MimeNotCss").get(), strings, 2, nsIScriptError::errorFlag); } + } else { + // Drop the data stream so that we do not load it + aDataStream = nsnull; } - - mLoader->DidLoadStyle(aLoader, strUnicodeBuffer, this, aStatus); - // NOTE: passed ownership of strUnicodeBuffer to mLoader in the call, - // so nulling it out for clarity / safety - strUnicodeBuffer = nsnull; - + + mLoader->DidLoadStyle(aLoader, aDataStream, this, aStatus); // We added a reference when the loader was created. This // release should destroy it. NS_RELEASE(aLoader); - return result; + return NS_OK; } static PRBool PR_CALLBACK @@ -1068,8 +1233,8 @@ CSSLoaderImpl::ParseSheet(nsIUnicharInputStream* aIn, } void -CSSLoaderImpl::DidLoadStyle(nsIStreamLoader* aLoader, - nsString* aStyleData, +CSSLoaderImpl::DidLoadStyle(nsIUnicharStreamLoader* aLoader, + nsIUnicharInputStream* aStyleDataStream, SheetLoadData* aLoadData, nsresult aStatus) { @@ -1077,25 +1242,12 @@ CSSLoaderImpl::DidLoadStyle(nsIStreamLoader* aLoader, NS_ASSERTION(! mSyncCallback, "getting synchronous callback from netlib"); #endif - if (NS_SUCCEEDED(aStatus) && (aStyleData) && (!aStyleData->IsEmpty()) && (mDocument)) { - nsresult result; - nsIUnicharInputStream* uin = nsnull; - - // wrap the string with the CSS data up in a unicode input stream. - result = NS_NewStringUnicharInputStream(&uin, aStyleData); - - if (NS_SUCCEEDED(result)) { - // XXX We have no way of indicating failure. Silently fail? - PRBool completed; - nsICSSStyleSheet* sheet; - result = ParseSheet(uin, aLoadData, completed, sheet); - NS_IF_RELEASE(sheet); - NS_IF_RELEASE(uin); - } - else { - URLKey key(aLoadData->mURL); - Cleanup(key, aLoadData); - } + if (NS_SUCCEEDED(aStatus) && aStyleDataStream && mDocument) { + // XXX We have no way of indicating failure. Silently fail? + PRBool completed; + nsCOMPtr sheet; + ParseSheet(aStyleDataStream, aLoadData, completed, *getter_AddRefs(sheet)); + // XXX clean up if failure or something? } else { // load failed or document now gone, cleanup #ifdef DEBUG @@ -1399,9 +1551,9 @@ CSSLoaderImpl::LoadSheet(URLKey& aKey, SheetLoadData* aData) } } else if (mDocument || aData->mIsAgent) { // we're still live, start an async load - nsIStreamLoader* loader; - nsIURI* urlClone; - result = aKey.mURL->Clone(&urlClone); // dont give key URL to netlib, it gets munged + nsIUnicharStreamLoader* loader; + nsCOMPtr urlClone; + result = aKey.mURL->Clone(getter_AddRefs(urlClone)); // dont give key URL to netlib, it gets munged if (NS_SUCCEEDED(result)) { #ifdef NS_DEBUG mSyncCallback = PR_TRUE; @@ -1418,14 +1570,27 @@ CSSLoaderImpl::LoadSheet(URLKey& aKey, SheetLoadData* aData) NS_TIMELINE_MARK_URI("Loading style sheet: %s", urlClone); NS_TIMELINE_INDENT(); #endif - result = NS_NewStreamLoader(&loader, urlClone, aData, nsnull, - loadGroup, nsnull, nsIChannel::LOAD_NORMAL, - document_uri, - nsIHttpChannel::REFERRER_INLINES); + nsCOMPtr channel; + result = NS_NewChannel(getter_AddRefs(channel), + urlClone, nsnull, loadGroup, + nsnull, nsIChannel::LOAD_NORMAL); + if (NS_SUCCEEDED(result)) { + if (document_uri) { + nsCOMPtr httpChannel(do_QueryInterface(channel)); + if (httpChannel) { + result = httpChannel->SetReferrer(document_uri, + nsIHttpChannel::REFERRER_INLINES); + } + } + + if (NS_SUCCEEDED(result)) { + result = NS_NewUnicharStreamLoader(&loader, channel, aData); + } + } + #ifdef NS_DEBUG mSyncCallback = PR_FALSE; #endif - NS_RELEASE(urlClone); if (NS_SUCCEEDED(result)) { mLoadingSheets.Put(&aKey, aData); // grab any pending alternates that have this URL @@ -1711,7 +1876,8 @@ CSSLoaderImpl::LoadAgentSheet(nsIURI* aURL, do_CreateInstance("@mozilla.org/intl/converter-input-stream;1", &result); if (NS_SUCCEEDED(result)) - result = uin->Init(in, mCharset.get(), 0); + result = uin->Init(in, NS_LITERAL_STRING("ISO-8859-1").get(), + 0, PR_TRUE); if (NS_SUCCEEDED(result)) { SheetLoadData* data = new SheetLoadData(this, aURL, aObserver); if (data == nsnull) { @@ -1792,87 +1958,6 @@ nsresult NS_NewCSSLoader(nsICSSLoader** aLoader) } - -NS_IMETHODIMP CSSLoaderImpl::GetCharset(/*out*/nsAString &aCharsetDest) const -{ - NS_ASSERTION(!mCharset.IsEmpty(), "CSSLoader charset should be set in ctor" ); - nsresult rv = NS_OK; - aCharsetDest = mCharset; - return rv; -} - -NS_IMETHODIMP CSSLoaderImpl::SetCharset(/*in*/ const nsAString &aCharsetSrc) - // public method for clients to set the charset if they know it - // NOTE: the SetCharset method will always get the preferred - // charset from the charset passed in unless it is the - // emptystring, which causes the default charset (that of the - // document, falling back to ISO-8869-1) to be set -{ - nsresult rv = NS_ERROR_NOT_AVAILABLE; - if (! aCharsetSrc.IsEmpty()) { - nsCOMPtr calias(do_GetService(kCharsetAliasCID, &rv)); - NS_ASSERTION(calias, "cannot find charset alias"); - if (calias) - { - PRBool same = PR_FALSE; - rv = calias->Equals(aCharsetSrc, mCharset, &same); - if(NS_SUCCEEDED(rv) && same) - { - return NS_OK; // no difference, don't change it - } - rv = calias->GetPreferred(aCharsetSrc, mCharset); - } - } else if (mDocument) { - // GetDocumentCharacterSet returns a charset which already has - // alias resolution done - rv = mDocument->GetDocumentCharacterSet(mCharset); - } - if (mCharset.IsEmpty()) { - mCharset = NS_LITERAL_STRING("ISO-8859-1"); - rv = NS_ERROR_NOT_AVAILABLE; - } - - return rv; -} - -nsresult CSSLoaderImpl::SetCharset(/*in*/ const char* aStyleSheetData, - /*in*/ PRUint32 aDataLength) - // sets the charset based upon the data passed in - // - if the StyleSheetData is not empty and it has '@charset' - // as the first substring, then use that - // - otherwise return an error -{ - nsresult rv = NS_ERROR_NOT_AVAILABLE; - nsString strStyleDataUndecoded; - strStyleDataUndecoded.AssignWithConversion(aStyleSheetData, aDataLength); - PRInt32 charsetOffset; - if (!strStyleDataUndecoded.IsEmpty()) { - nsString str; - static const char atCharsetStr[] = "@charset"; - if ((charsetOffset = strStyleDataUndecoded.Find(atCharsetStr)) > -1) { - nsString strValue; - // skip past the ident - strStyleDataUndecoded.Right(str, strStyleDataUndecoded.Length() - - (sizeof(atCharsetStr)-1)); - // strip any whitespace - str.StripWhitespace(); - // truncate everything past the delimiter (semicolon) - PRInt32 pos = str.Find(";"); - if (pos > -1) { - str.Left(strValue,pos); - } - // strip any quotes - strValue.Trim("\"\'"); - - // that's the charset! - if (!strValue.IsEmpty()) { - rv = SetCharset(strValue); - } - } - } - return rv; -} - static PRBool PR_CALLBACK StopLoadingSheetCallback(nsHashKey* aKey, void* aData, void* aClosure) { NS_ENSURE_TRUE(aData, NS_ERROR_NULL_POINTER); diff --git a/layout/style/nsCSSParser.cpp b/layout/style/nsCSSParser.cpp index 8294a6d288b7..4c56864fdd47 100644 --- a/layout/style/nsCSSParser.cpp +++ b/layout/style/nsCSSParser.cpp @@ -181,11 +181,6 @@ public: nsCSSDeclaration* aDeclaration, nsChangeHint* aHint); - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const; - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc); - // NOTE: SetCharset expects the charset to be the preferred charset - // and it just records the string exactly as passed in (no alias resolution) void AppendRule(nsICSSRule* aRule); protected: @@ -353,8 +348,6 @@ protected: nsISupportsArray* mGroupStack; - nsString mCharset; // the charset we are using - PRBool mParsingCompoundProperty; void SetParsingCompoundProperty(PRBool aBool) {mParsingCompoundProperty = aBool;}; PRBool IsParsingCompoundProperty(void) {return mParsingCompoundProperty;}; @@ -438,9 +431,6 @@ CSSParserImpl::CSSParserImpl() mParsingCompoundProperty(PR_FALSE) { NS_INIT_REFCNT(); - - // set the default charset - mCharset.Assign(NS_LITERAL_STRING("ISO-8859-1")); } NS_IMETHODIMP @@ -5353,15 +5343,3 @@ PRBool CSSParserImpl::ParseTextShadow(PRInt32& aErrorCode, } return PR_FALSE; } - -NS_IMETHODIMP CSSParserImpl::GetCharset(/*out*/nsAString &aCharsetDest) const -{ - aCharsetDest = mCharset; - return NS_OK; -} - -NS_IMETHODIMP CSSParserImpl::SetCharset(/*in*/ const nsAString &aCharsetSrc) -{ - mCharset = aCharsetSrc; - return NS_OK; -} diff --git a/layout/style/nsICSSLoader.h b/layout/style/nsICSSLoader.h index ff0a635d25f5..ed1966d0761b 100644 --- a/layout/style/nsICSSLoader.h +++ b/layout/style/nsICSSLoader.h @@ -125,13 +125,6 @@ public: PRBool& aCompleted, nsICSSLoaderObserver* aObserver) = 0; - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const = 0; - - // SetCharset will ensure that the charset provided is the preferred charset - // if an empty string, then it is set to the default charset - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc) = 0; - // stop loading all sheets NS_IMETHOD Stop(void) = 0; diff --git a/layout/style/nsICSSParser.h b/layout/style/nsICSSParser.h index 2a1071a3b1fb..cd5233da4e15 100644 --- a/layout/style/nsICSSParser.h +++ b/layout/style/nsICSSParser.h @@ -106,17 +106,6 @@ public: nsIURI* aBaseURL, nsCSSDeclaration* aDeclaration, nsChangeHint* aHint) = 0; - - // Charset management method: - // Set the charset before calling any of the Parse emthods if you want the - // charset to be anything other than the default - - // sets the out-param to the current charset, as set by SetCharset - NS_IMETHOD GetCharset(/*out*/nsAString &aCharsetDest) const = 0; - - // SetCharset expects the charset to be the preferred charset - // and it just records the string exactly as passed in (no alias resolution) - NS_IMETHOD SetCharset(/*in*/ const nsAString &aCharsetSrc) = 0; }; // Values or'd in the GetInfoMask; other bits are reserved diff --git a/netwerk/base/public/MANIFEST_IDL b/netwerk/base/public/MANIFEST_IDL index 810065976b22..decb4500be33 100644 --- a/netwerk/base/public/MANIFEST_IDL +++ b/netwerk/base/public/MANIFEST_IDL @@ -30,6 +30,7 @@ nsIStreamListenerTee.idl nsIFileStreams.idl nsITransport.idl nsIStreamLoader.idl +nsIUnicharStreamLoader.idl nsIDownloader.idl nsIResumableChannel.idl nsIResumableEntityID.idl diff --git a/netwerk/base/public/Makefile.in b/netwerk/base/public/Makefile.in index f576e7b14a49..5dc55bff970f 100644 --- a/netwerk/base/public/Makefile.in +++ b/netwerk/base/public/Makefile.in @@ -77,6 +77,7 @@ XPIDLSRCS = \ nsISimpleStreamListener.idl \ nsISimpleStreamProvider.idl \ nsIStreamLoader.idl \ + nsIUnicharStreamLoader.idl \ nsIUploadChannel.idl \ nsIFileURL.idl \ nsIStandardURL.idl \ diff --git a/netwerk/base/public/nsIUnicharStreamLoader.idl b/netwerk/base/public/nsIUnicharStreamLoader.idl new file mode 100644 index 000000000000..9b5046eebcd9 --- /dev/null +++ b/netwerk/base/public/nsIUnicharStreamLoader.idl @@ -0,0 +1,110 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2002 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Darin Fisher (original author) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsISupports.idl" + +interface nsIUnicharInputStream; +interface nsIUnicharStreamLoader; +interface nsIChannel; + +[scriptable, uuid(e06e8b08-8cdd-4503-a0a0-6f3b943602af)] +interface nsIUnicharStreamLoaderObserver : nsISupports +{ + /** + * Called when the first full segment of data if available. + * + * @param aLoader the unichar stream loader + * @param aContext the aContext parameter passed to the loader's init method + * @param aFirstSegment the raw bytes of the first full data segment + * @param aLength the length of aFirstSegment + * + * @return charset corresponding to this stream + */ + ACString onDetermineCharset(in nsIUnicharStreamLoader aLoader, + in nsISupports aContext, + [size_is(aLength)] in string aFirstSegment, + in unsigned long aLength); + + /** + * Called when the entire stream has been loaded. + * + * @param aLoader the unichar stream loader + * @param aContext the aContext parameter passed to the loader's init method + * @param aStatus the status of the underlying channel + * @param aUnicharData the unichar input stream containing the data. This + * can be null in some failure conditions. + */ + void onStreamComplete(in nsIUnicharStreamLoader aLoader, + in nsISupports aContext, + in nsresult aStatus, + in nsIUnicharInputStream aUnicharData); +}; + +[scriptable, uuid(8a3eca16-167e-443d-9485-7e84ed822e95)] +interface nsIUnicharStreamLoader : nsISupports +{ + const unsigned long DEFAULT_SEGMENT_SIZE = 4096; + + /** + * Initializes the unichar stream loader + * + * @param aChannel the channel to read data from. This should _not_ be + * opened; the loader will open the channel itself. + * @param aObserver the observer to notify when a charset is needed and when + * the load is complete + * @param aContext an opaque context pointer + * @param aSegmentSize the size of the segments to use for the data, in bytes + */ + + void init(in nsIChannel aChannel, + in nsIUnicharStreamLoaderObserver aObserver, + in nsISupports aContext, + in unsigned long aSegmentSize); + + /** + * The channel attribute is only valid inside the onDetermineCharset + * and onStreamComplete callbacks. Otherwise it will be null. + */ + readonly attribute nsIChannel channel; + + /** + * The charset that onDetermineCharset returned, if that's been + * called. + */ + readonly attribute ACString charset; +}; diff --git a/netwerk/base/public/nsNetUtil.h b/netwerk/base/public/nsNetUtil.h index 76b7bae4fdc5..6b46267703c5 100644 --- a/netwerk/base/public/nsNetUtil.h +++ b/netwerk/base/public/nsNetUtil.h @@ -64,6 +64,7 @@ #include "nsIDownloader.h" #include "nsIResumableEntityID.h" #include "nsIStreamLoader.h" +#include "nsIUnicharStreamLoader.h" #include "nsIStreamIO.h" #include "nsIPipe.h" #include "nsIProtocolHandler.h" @@ -434,6 +435,29 @@ NS_NewStreamLoader(nsIStreamLoader* *result, return rv; } +inline nsresult +NS_NewUnicharStreamLoader(nsIUnicharStreamLoader **aResult, + nsIChannel *aChannel, + nsIUnicharStreamLoaderObserver *aObserver, + nsISupports *aContext = nsnull, + PRUint32 aSegmentSize = nsIUnicharStreamLoader::DEFAULT_SEGMENT_SIZE) +{ + nsresult rv; + nsCOMPtr loader; + static NS_DEFINE_CID(kUnicharStreamLoaderCID, NS_UNICHARSTREAMLOADER_CID); + rv = nsComponentManager::CreateInstance(kUnicharStreamLoaderCID, + nsnull, + NS_GET_IID(nsIUnicharStreamLoader), + getter_AddRefs(loader)); + if (NS_FAILED(rv)) return rv; + rv = loader->Init(aChannel, aObserver, aContext, aSegmentSize); + + if (NS_FAILED(rv)) return rv; + *aResult = loader; + NS_ADDREF(*aResult); + return rv; +} + inline nsresult NS_NewRequestObserverProxy(nsIRequestObserver **aResult, nsIRequestObserver *aObserver, diff --git a/netwerk/base/src/Makefile.in b/netwerk/base/src/Makefile.in index 4f9e6a6370d0..cab69e5a461a 100644 --- a/netwerk/base/src/Makefile.in +++ b/netwerk/base/src/Makefile.in @@ -66,6 +66,7 @@ CPPSRCS = \ nsStreamListenerProxy.cpp \ nsStreamListenerTee.cpp \ nsStreamLoader.cpp \ + nsUnicharStreamLoader.cpp \ nsStreamProviderProxy.cpp \ nsURIChecker.cpp \ nsURLHelper.cpp \ diff --git a/netwerk/base/src/nsUnicharStreamLoader.cpp b/netwerk/base/src/nsUnicharStreamLoader.cpp new file mode 100644 index 000000000000..edb4437bd900 --- /dev/null +++ b/netwerk/base/src/nsUnicharStreamLoader.cpp @@ -0,0 +1,263 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2002 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Boris Zbarsky (original author) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "nsUnicharStreamLoader.h" +#include "nsIPipe.h" +#include "nsIChannel.h" +#include "nsNetUtil.h" +#include "nsProxiedService.h" +#include "nsIChannel.h" +#include "nsIUnicharInputStream.h" +#include "nsIConverterInputStream.h" +#include "nsIPipe.h" + +#ifdef DEBUG // needed for IsASCII assertion +#include "nsReadableUtils.h" +#endif // DEBUG + +static NS_DEFINE_CID(kProxyObjectManagerCID, NS_PROXYEVENT_MANAGER_CID); + +NS_IMETHODIMP +nsUnicharStreamLoader::Init(nsIChannel *aChannel, + nsIUnicharStreamLoaderObserver *aObserver, + nsISupports *aContext, + PRUint32 aSegmentSize) +{ + NS_ENSURE_ARG_POINTER(aChannel); + NS_ENSURE_ARG_POINTER(aObserver); + + if (aSegmentSize <= 0) { + aSegmentSize = nsIUnicharStreamLoader::DEFAULT_SEGMENT_SIZE; + } + + nsresult rv = aChannel->AsyncOpen(this, aContext); + + if (NS_FAILED(rv)) { + // don't callback synchronously as it puts the caller + // in a recursive situation and breaks the asynchronous + // semantics of nsIStreamLoader + nsresult rv2 = NS_OK; + nsCOMPtr pIProxyObjectManager = + do_GetService(kProxyObjectManagerCID, &rv2); + if (NS_FAILED(rv2)) + return rv2; + + nsCOMPtr pObserver; + rv2 = + pIProxyObjectManager->GetProxyForObject(NS_CURRENT_EVENTQ, + NS_GET_IID(nsIUnicharStreamLoaderObserver), + aObserver, + PROXY_ASYNC | PROXY_ALWAYS, + getter_AddRefs(pObserver)); + if (NS_FAILED(rv2)) + return rv2; + + rv = pObserver->OnStreamComplete(this, aContext, rv, nsnull); + } + + mObserver = aObserver; + mContext = aContext; + mCharset.Truncate(); + mChannel = nsnull; // Leave this null till OnStopRequest + mSegmentSize = aSegmentSize; + return rv; +} + +NS_METHOD +nsUnicharStreamLoader::Create(nsISupports *aOuter, + REFNSIID aIID, + void **aResult) +{ + if (aOuter) return NS_ERROR_NO_AGGREGATION; + + nsUnicharStreamLoader* it = new nsUnicharStreamLoader(); + if (it == nsnull) + return NS_ERROR_OUT_OF_MEMORY; + NS_ADDREF(it); + nsresult rv = it->QueryInterface(aIID, aResult); + NS_RELEASE(it); + return rv; +} + +NS_IMPL_ISUPPORTS3(nsUnicharStreamLoader, nsIUnicharStreamLoader, + nsIRequestObserver, nsIStreamListener) + +/* readonly attribute nsIChannel channel; */ +NS_IMETHODIMP +nsUnicharStreamLoader::GetChannel(nsIChannel **aChannel) +{ + NS_IF_ADDREF(*aChannel = mChannel); + return NS_OK; +} + +/* readonly attribute nsACString charset */ +NS_IMETHODIMP +nsUnicharStreamLoader::GetCharset(nsACString& aCharset) +{ + aCharset = mCharset; + return NS_OK; +} + +/* nsIRequestObserver implementation */ +NS_IMETHODIMP +nsUnicharStreamLoader::OnStartRequest(nsIRequest* request, + nsISupports *ctxt) +{ + return NS_OK; +} + +NS_IMETHODIMP +nsUnicharStreamLoader::OnStopRequest(nsIRequest *request, + nsISupports *ctxt, + nsresult aStatus) +{ + nsresult rv = NS_OK; + NS_ASSERTION(mObserver, "No way we can not have an mObserver here!"); + if (mInputStream) { + // We got some data at some point. I guess we should tell our + // observer about it or something.... + + // Make sure mChannel points to the channel that we ended up with + mChannel = do_QueryInterface(request); + + // Determine the charset + PRUint32 readCount = 0; + // XXX Ignore the error return; we have to do it because the pipe is + // broken. See XXX comment in WriteSegmentFun. + mInputStream->ReadSegments(WriteSegmentFun, + this, + mSegmentSize, + &readCount); + + nsCOMPtr uin = + do_CreateInstance("@mozilla.org/intl/converter-input-stream;1", + &rv); + if (NS_FAILED(rv)) { + rv = mObserver->OnStreamComplete(this, mContext, rv, nsnull); + goto cleanup; + } + + rv = uin->Init(mInputStream, + NS_ConvertASCIItoUCS2(mCharset).get(), + mSegmentSize, + PR_TRUE); + + if (NS_FAILED(rv)) { + rv = mObserver->OnStreamComplete(this, mContext, rv, nsnull); + goto cleanup; + } + + mObserver->OnStreamComplete(this, mContext, aStatus, uin); + + } + + // Clean up. + cleanup: + mObserver = nsnull; + mChannel = nsnull; + mContext = nsnull; + mInputStream = nsnull; + mOutputStream = nsnull; + return rv; +} + +/* nsIStreamListener implementation */ +NS_METHOD +nsUnicharStreamLoader::WriteSegmentFun(nsIInputStream *aInputStream, + void *aClosure, + const char *aSegment, + PRUint32 aToOffset, + PRUint32 aCount, + PRUint32 *aWriteCount) +{ + nsUnicharStreamLoader *self = (nsUnicharStreamLoader *) aClosure; + if (self->mCharset.IsEmpty()) { + // First time through. Call our observer. + NS_ASSERTION(self->mObserver, "This should never be possible"); + + nsresult rv = self->mObserver->OnDetermineCharset(self, + self->mContext, + aSegment, + aCount, + self->mCharset); + + if (NS_FAILED(rv) || self->mCharset.IsEmpty()) { + // The observer told us nothing useful + self->mCharset = NS_LITERAL_CSTRING("ISO-8859-1"); + } + + NS_ASSERTION(IsASCII(self->mCharset), + "Why is the charset name non-ascii? Whose bright idea was that?"); + } + // Don't consume any data + *aWriteCount = 0; + // XXX Should return NS_BASE_STREAM_WOULD_BLOCK but the pipe goes into a loop! + return NS_ERROR_FAILURE; +} + + +NS_IMETHODIMP +nsUnicharStreamLoader::OnDataAvailable(nsIRequest *aRequest, + nsISupports *aContext, + nsIInputStream *aInputStream, + PRUint32 aSourceOffset, + PRUint32 aCount) +{ + nsresult rv = NS_OK; + if (!mInputStream) { + // We are not initialized. Time to set things up. + NS_ASSERTION(!mOutputStream, "Why are we sorta-initialized?"); + rv = NS_NewPipe(getter_AddRefs(mInputStream), + getter_AddRefs(mOutputStream), + mSegmentSize, + PRUint32(-1), // give me all the data you can! + PR_TRUE, // non-blocking input + PR_TRUE); // non-blocking output + if (NS_FAILED(rv)) + return rv; + } + + PRUint32 writeCount = 0; + do { + rv = mOutputStream->WriteFrom(aInputStream, aCount, &writeCount); + if (NS_FAILED(rv)) return rv; + aCount -= writeCount; + } while (aCount > 0); + + return NS_OK; +} diff --git a/netwerk/base/src/nsUnicharStreamLoader.h b/netwerk/base/src/nsUnicharStreamLoader.h new file mode 100644 index 000000000000..528c937f8dda --- /dev/null +++ b/netwerk/base/src/nsUnicharStreamLoader.h @@ -0,0 +1,82 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is mozilla.org code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 2002 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Boris Zbarsky (original author) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef nsUnicharStreamLoader_h__ +#define nsUnicharStreamLoader_h__ + +#include "nsIUnicharStreamLoader.h" +#include "nsIStreamListener.h" +#include "nsCOMPtr.h" +#include "nsIChannel.h" +#include "nsString.h" +#include "nsIInputStream.h" +#include "nsIOutputStream.h" + +class nsUnicharStreamLoader : public nsIUnicharStreamLoader, + public nsIStreamListener +{ +public: + NS_DECL_ISUPPORTS + NS_DECL_NSIUNICHARSTREAMLOADER + NS_DECL_NSIREQUESTOBSERVER + NS_DECL_NSISTREAMLISTENER + + nsUnicharStreamLoader() { NS_INIT_REFCNT(); } + virtual ~nsUnicharStreamLoader() {}; + + static NS_METHOD + Create(nsISupports *aOuter, REFNSIID aIID, void **aResult); + +protected: + /** + * callback method used for ReadSegments + */ + static NS_METHOD WriteSegmentFun(nsIInputStream *, void *, const char *, + PRUint32, PRUint32, PRUint32 *); + + nsCOMPtr mObserver; + nsCOMPtr mContext; // observer's context + nsCString mCharset; + nsCOMPtr mChannel; + nsCOMPtr mInputStream; + nsCOMPtr mOutputStream; + PRUint32 mSegmentSize; + +}; + +#endif // nsUnicharStreamLoader_h__ diff --git a/netwerk/build/nsNetCID.h b/netwerk/build/nsNetCID.h index 8df75aab1462..a60de042418a 100644 --- a/netwerk/build/nsNetCID.h +++ b/netwerk/build/nsNetCID.h @@ -274,6 +274,19 @@ { 0xa1, 0xa5, 0x0, 0x50, 0x4, 0x1c, 0xaf, 0x44 } \ } +// component implementing nsIUnicharStreamLoader. +#define NS_UNICHARSTREAMLOADER_CLASSNAME \ + "nsUnicharStreamLoader" +#define NS_UNICHARSTREAMLOADER_CONTRACTID \ + "@mozilla.org/network/unichar-stream-loader;1" +#define NS_UNICHARSTREAMLOADER_CID \ +{ /* 9445791f-fa4c-4669-b174-df5032bb67b3 */ \ + 0x9445791f, \ + 0xfa4c, \ + 0x4669, \ + { 0xb1, 0x74, 0xdf, 0x50, 0x32, 0xbb, 0x67, 0xb3 } \ +} + // component implementing nsIDownloader. #define NS_DOWNLOADER_CLASSNAME \ "nsDownloader" diff --git a/netwerk/build/nsNetModule.cpp b/netwerk/build/nsNetModule.cpp index 9f45579501b8..327352f7fd98 100644 --- a/netwerk/build/nsNetModule.cpp +++ b/netwerk/build/nsNetModule.cpp @@ -52,6 +52,7 @@ #include "nsLoadGroup.h" #include "nsInputStreamChannel.h" #include "nsStreamLoader.h" +#include "nsUnicharStreamLoader.h" #include "nsDownloader.h" #include "nsAsyncStreamListener.h" #include "nsFileStreams.h" @@ -640,6 +641,10 @@ static const nsModuleComponentInfo gNetModuleInfo[] = { NS_STREAMLOADER_CID, NS_STREAMLOADER_CONTRACTID, nsStreamLoader::Create }, + { NS_UNICHARSTREAMLOADER_CLASSNAME, + NS_UNICHARSTREAMLOADER_CID, + NS_UNICHARSTREAMLOADER_CONTRACTID, + nsUnicharStreamLoader::Create }, { NS_DOWNLOADER_CLASSNAME, NS_DOWNLOADER_CID, NS_DOWNLOADER_CONTRACTID, diff --git a/netwerk/macbuild/netwerk.xml b/netwerk/macbuild/netwerk.xml index f2be8f0a45d1..a2d0b37e7380 100644 --- a/netwerk/macbuild/netwerk.xml +++ b/netwerk/macbuild/netwerk.xml @@ -1142,6 +1142,13 @@ Text Debug + + Name + nsUnicharStreamLoader.cpp + MacOS + Text + Debug + Name nsURLHelper.cpp @@ -1731,6 +1738,11 @@ nsStreamLoader.cpp MacOS + + Name + nsUnicharStreamLoader.cpp + MacOS + Name nsURLHelper.cpp @@ -3152,6 +3164,13 @@ Text Debug + + Name + nsUnicharStreamLoader.cpp + MacOS + Text + Debug + Name nsURLHelper.cpp @@ -3741,6 +3760,11 @@ nsStreamLoader.cpp MacOS + + Name + nsUnicharStreamLoader.cpp + MacOS + Name nsURLHelper.cpp @@ -5162,6 +5186,13 @@ Text Debug + + Name + nsUnicharStreamLoader.cpp + MacOS + Text + Debug + Name nsURLHelper.cpp @@ -5737,6 +5768,11 @@ nsStreamLoader.cpp MacOS + + Name + nsUnicharStreamLoader.cpp + MacOS + Name nsURLHelper.cpp @@ -7148,6 +7184,13 @@ Text Debug + + Name + nsUnicharStreamLoader.cpp + MacOS + Text + Debug + Name nsURLHelper.cpp @@ -7723,6 +7766,11 @@ nsStreamLoader.cpp MacOS + + Name + nsUnicharStreamLoader.cpp + MacOS + Name nsURLHelper.cpp @@ -8218,6 +8266,12 @@ nsStreamLoader.cpp MacOS + + Necko.shlb + Name + nsUnicharStreamLoader.cpp + MacOS + Necko.shlb Name diff --git a/netwerk/macbuild/netwerkIDL.xml b/netwerk/macbuild/netwerkIDL.xml index ed779e1e6f8e..57f7bc3581b4 100644 --- a/netwerk/macbuild/netwerkIDL.xml +++ b/netwerk/macbuild/netwerkIDL.xml @@ -1015,6 +1015,13 @@ Text + + Name + nsIUnicharStreamLoader.idl + MacOS + Text + + Name nsIURLParser.idl @@ -1625,6 +1632,11 @@ nsIStreamLoader.idl MacOS + + Name + nsIUnicharStreamLoader.idl + MacOS + Name nsIURLParser.idl @@ -2934,6 +2946,13 @@ Text + + Name + nsIUnicharStreamLoader.idl + MacOS + Text + + Name nsIURLParser.idl @@ -3537,6 +3556,11 @@ nsIStreamLoader.idl MacOS + + Name + nsIUnicharStreamLoader.idl + MacOS + Name nsIURLParser.idl @@ -4094,6 +4118,12 @@ nsIStreamLoader.idl MacOS + + headers + Name + nsIUnicharStreamLoader.idl + MacOS + headers Name