Optimize nsSegmentEncoder to not get an mEncoder unless it actually has

encoding to do (has a non-ascii spec segment). Bug 212475, r=darin, sr=dbaron
2003-07-12 23:06:13 +00:00 · 2003-07-12 23:06:13 +00:00 · 06dbe6f09a
--- a/netwerk/base/src/nsStandardURL.cpp
+++ b/netwerk/base/src/nsStandardURL.cpp
@ -170,27 +170,8 @@ nsPrefObserver::Observe(nsISupports *subject,

 nsStandardURL::
 nsSegmentEncoder::nsSegmentEncoder(const char *charset)
+    : mCharset(charset)
 {
-    if (!charset || !*charset)
-        return;
-
-    // get unicode encoder (XXX cache this someplace)
-    nsresult rv;
-    if (!gCharsetMgr) {
-        nsCOMPtr<nsICharsetConverterManager> convMgr(
-                do_GetService("@mozilla.org/charset-converter-manager;1", &rv));
-        if (NS_FAILED(rv)) {
-            NS_ERROR("failed to get charset-converter-manager");
-            return;
-        }
-        NS_ADDREF(gCharsetMgr = convMgr);
-    }
-
-    rv = gCharsetMgr->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
-    if (NS_FAILED(rv)) {
-        NS_ERROR("failed to get unicode encoder");
-        mEncoder = 0; // just in case
-    }
 }

 PRInt32 nsStandardURL::
@ -207,9 +188,13 @@ nsSegmentEncoder::EncodeSegmentCount(const char *str,
        len = seg.mLen;

        // first honor the origin charset if appropriate. as an optimization,
-        // only do this if |str| is non-ASCII.
+        // only do this if the segment is non-ASCII.  Further, if mCharset is
+        // null or the empty string then the origin charset is UTF-8 and there
+        // is nothing to do.
        nsCAutoString encBuf;
-        if (mEncoder && !nsCRT::IsAscii(str)) {
+        if (!nsCRT::IsAscii(str + pos, len) && mCharset && *mCharset) {
+            // we have to encode this segment
+            if (mEncoder || InitUnicodeEncoder()) {
                NS_ConvertUTF8toUCS2 ucsBuf(Substring(str + pos, str + pos + len));
                if (NS_SUCCEEDED(EncodeString(mEncoder, ucsBuf, encBuf))) {
                    str = encBuf.get();
@ -218,6 +203,7 @@ nsSegmentEncoder::EncodeSegmentCount(const char *str,
                }
                // else some failure occured... assume UTF-8 is ok.
            }
+        }

        // escape per RFC2396 unless UTF-8 and allowed by preferences
        PRInt16 escapeFlags = (gEscapeUTF8 || mEncoder) ? 0 : esc_OnlyASCII;
@ -250,6 +236,30 @@ nsSegmentEncoder::EncodeSegment(const nsASingleFragmentCString &str,
        return str;
 }

+PRBool nsStandardURL::
+nsSegmentEncoder::InitUnicodeEncoder()
+{
+    NS_ASSERTION(!mEncoder, "Don't call this if we have an encoder already!");
+    nsresult rv;
+    if (!gCharsetMgr) {
+        rv = CallGetService("@mozilla.org/charset-converter-manager;1",
+                            &gCharsetMgr);
+        if (NS_FAILED(rv)) {
+            NS_ERROR("failed to get charset-converter-manager");
+            return PR_FALSE;
+        }
+    }
+
+    rv = gCharsetMgr->GetUnicodeEncoder(mCharset, getter_AddRefs(mEncoder));
+    if (NS_FAILED(rv)) {
+        NS_ERROR("failed to get unicode encoder");
+        mEncoder = 0; // just in case
+        return PR_FALSE;
+    }
+
+    return PR_TRUE;
+}
+
 #define GET_SEGMENT_ENCODER(name) \
    nsSegmentEncoder name(mOriginCharset.get())

--- a/netwerk/base/src/nsStandardURL.h
+++ b/netwerk/base/src/nsStandardURL.h
@ -131,6 +131,10 @@ public: /* internal -- HPUX compiler can't handle this being private */
                                        PRInt16 mask,
                                        nsAFlatCString &buf);
    private:
+        PRBool InitUnicodeEncoder();
+        
+        const char* mCharset;  // Caller should keep this alive for
+                               // the life of the segment encoder
        nsCOMPtr<nsIUnicodeEncoder> mEncoder;
    };
    friend class nsSegmentEncoder;
--- a/xpcom/ds/nsCRT.cpp
+++ b/xpcom/ds/nsCRT.cpp
@ -469,6 +469,26 @@ PRBool nsCRT::IsAscii(const char *aString) {
  }
  return PR_TRUE;
 }
+/**
+ *  Determine whether the given string consists of valid ascii chars
+ *  
+ *  @param   aString is null terminated
+ *  @param   aLength is the number of chars to test.  This must be at most
+ *           the number of chars in aString before the null terminator
+ *  @return  PR_TRUE if all chars are valid ASCII chars, PR_FALSE otherwise
+ */
+PRBool nsCRT::IsAscii(const char* aString, PRUnichar aLength)
+{
+    const char* end = aString + aLength;
+    while (aString < end) {
+        NS_ASSERTION(*aString, "Null byte before end of data!");
+        if (0x80 & *aString)
+            return PR_FALSE;
+        ++aString;
+    }
+    return PR_TRUE;
+}
+
 /**
 *  Determine if given char in valid alpha range
 *  
--- a/xpcom/ds/nsCRT.h
+++ b/xpcom/ds/nsCRT.h
@ -262,6 +262,7 @@ public:
  static PRBool IsAsciiDigit(PRUnichar aChar);
  static PRBool IsAsciiSpace(PRUnichar aChar);
  static PRBool IsAscii(const char* aString);
+  static PRBool IsAscii(const char* aString, PRUnichar aLength);
 };

 #define FF '\014'