diff --git a/netwerk/base/src/nsStandardURL.cpp b/netwerk/base/src/nsStandardURL.cpp index b84532c9d9a..ee88618ae66 100644 --- a/netwerk/base/src/nsStandardURL.cpp +++ b/netwerk/base/src/nsStandardURL.cpp @@ -170,27 +170,8 @@ nsPrefObserver::Observe(nsISupports *subject, nsStandardURL:: nsSegmentEncoder::nsSegmentEncoder(const char *charset) + : mCharset(charset) { - if (!charset || !*charset) - return; - - // get unicode encoder (XXX cache this someplace) - nsresult rv; - if (!gCharsetMgr) { - nsCOMPtr convMgr( - do_GetService("@mozilla.org/charset-converter-manager;1", &rv)); - if (NS_FAILED(rv)) { - NS_ERROR("failed to get charset-converter-manager"); - return; - } - NS_ADDREF(gCharsetMgr = convMgr); - } - - rv = gCharsetMgr->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder)); - if (NS_FAILED(rv)) { - NS_ERROR("failed to get unicode encoder"); - mEncoder = 0; // just in case - } } PRInt32 nsStandardURL:: @@ -207,16 +188,21 @@ nsSegmentEncoder::EncodeSegmentCount(const char *str, len = seg.mLen; // first honor the origin charset if appropriate. as an optimization, - // only do this if |str| is non-ASCII. + // only do this if the segment is non-ASCII. Further, if mCharset is + // null or the empty string then the origin charset is UTF-8 and there + // is nothing to do. nsCAutoString encBuf; - if (mEncoder && !nsCRT::IsAscii(str)) { - NS_ConvertUTF8toUCS2 ucsBuf(Substring(str + pos, str + pos + len)); - if (NS_SUCCEEDED(EncodeString(mEncoder, ucsBuf, encBuf))) { - str = encBuf.get(); - pos = 0; - len = encBuf.Length(); + if (!nsCRT::IsAscii(str + pos, len) && mCharset && *mCharset) { + // we have to encode this segment + if (mEncoder || InitUnicodeEncoder()) { + NS_ConvertUTF8toUCS2 ucsBuf(Substring(str + pos, str + pos + len)); + if (NS_SUCCEEDED(EncodeString(mEncoder, ucsBuf, encBuf))) { + str = encBuf.get(); + pos = 0; + len = encBuf.Length(); + } + // else some failure occured... assume UTF-8 is ok. } - // else some failure occured... assume UTF-8 is ok. } // escape per RFC2396 unless UTF-8 and allowed by preferences @@ -250,6 +236,30 @@ nsSegmentEncoder::EncodeSegment(const nsASingleFragmentCString &str, return str; } +PRBool nsStandardURL:: +nsSegmentEncoder::InitUnicodeEncoder() +{ + NS_ASSERTION(!mEncoder, "Don't call this if we have an encoder already!"); + nsresult rv; + if (!gCharsetMgr) { + rv = CallGetService("@mozilla.org/charset-converter-manager;1", + &gCharsetMgr); + if (NS_FAILED(rv)) { + NS_ERROR("failed to get charset-converter-manager"); + return PR_FALSE; + } + } + + rv = gCharsetMgr->GetUnicodeEncoder(mCharset, getter_AddRefs(mEncoder)); + if (NS_FAILED(rv)) { + NS_ERROR("failed to get unicode encoder"); + mEncoder = 0; // just in case + return PR_FALSE; + } + + return PR_TRUE; +} + #define GET_SEGMENT_ENCODER(name) \ nsSegmentEncoder name(mOriginCharset.get()) diff --git a/netwerk/base/src/nsStandardURL.h b/netwerk/base/src/nsStandardURL.h index 022b5c5f9ed..c86f1db23ce 100644 --- a/netwerk/base/src/nsStandardURL.h +++ b/netwerk/base/src/nsStandardURL.h @@ -131,6 +131,10 @@ public: /* internal -- HPUX compiler can't handle this being private */ PRInt16 mask, nsAFlatCString &buf); private: + PRBool InitUnicodeEncoder(); + + const char* mCharset; // Caller should keep this alive for + // the life of the segment encoder nsCOMPtr mEncoder; }; friend class nsSegmentEncoder; diff --git a/xpcom/ds/nsCRT.cpp b/xpcom/ds/nsCRT.cpp index 0fba583cb11..f3b2eab0f81 100644 --- a/xpcom/ds/nsCRT.cpp +++ b/xpcom/ds/nsCRT.cpp @@ -469,6 +469,26 @@ PRBool nsCRT::IsAscii(const char *aString) { } return PR_TRUE; } +/** + * Determine whether the given string consists of valid ascii chars + * + * @param aString is null terminated + * @param aLength is the number of chars to test. This must be at most + * the number of chars in aString before the null terminator + * @return PR_TRUE if all chars are valid ASCII chars, PR_FALSE otherwise + */ +PRBool nsCRT::IsAscii(const char* aString, PRUnichar aLength) +{ + const char* end = aString + aLength; + while (aString < end) { + NS_ASSERTION(*aString, "Null byte before end of data!"); + if (0x80 & *aString) + return PR_FALSE; + ++aString; + } + return PR_TRUE; +} + /** * Determine if given char in valid alpha range * diff --git a/xpcom/ds/nsCRT.h b/xpcom/ds/nsCRT.h index 36163a854f9..42b2e9ff4ef 100644 --- a/xpcom/ds/nsCRT.h +++ b/xpcom/ds/nsCRT.h @@ -262,6 +262,7 @@ public: static PRBool IsAsciiDigit(PRUnichar aChar); static PRBool IsAsciiSpace(PRUnichar aChar); static PRBool IsAscii(const char* aString); + static PRBool IsAscii(const char* aString, PRUnichar aLength); }; #define FF '\014'