From f3076566940a76b02527b5308acf08f236f0f599 Mon Sep 17 00:00:00 2001
From: "darin%netscape.com" <darin%netscape.com>
Date: Wed, 18 Jun 2003 23:16:17 +0000
Subject: [PATCH] fixes bug 208845 "multiple content-type headers combined
 breaks mozilla" r=dougt sr=alecf

---
 netwerk/base/src/nsURLHelper.cpp              |  56 ++++++++
 netwerk/base/src/nsURLHelper.h                |  55 +++++++-
 netwerk/protocol/http/src/nsHttp.h            |   3 +
 netwerk/protocol/http/src/nsHttpAuthCache.h   |   4 +-
 netwerk/protocol/http/src/nsHttpHandler.cpp   |   8 +-
 .../protocol/http/src/nsHttpHeaderArray.cpp   |  61 +++++----
 .../protocol/http/src/nsHttpResponseHead.cpp  | 124 +++++++++++-------
 7 files changed, 229 insertions(+), 82 deletions(-)

diff --git a/netwerk/base/src/nsURLHelper.cpp b/netwerk/base/src/nsURLHelper.cpp
index 9a9bcb654d2..f67251758fc 100644
--- a/netwerk/base/src/nsURLHelper.cpp
+++ b/netwerk/base/src/nsURLHelper.cpp
@@ -544,3 +544,59 @@ net_ToLowerCase(char *str)
     for (; *str; ++str)
         ToLower(*str);
 }
+
+char *
+net_FindCharInSet(const char *iter, const char *stop, const char *set)
+{
+    for (; iter != stop && *iter; ++iter) {
+        for (const char *s = set; *s; ++s) {
+            if (*iter == *s)
+                return (char *) iter;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_RFindCharInSet(const char *stop, const char *iter, const char *set)
+{
+    --iter;
+    --stop;
+    for (; iter != stop; --iter) {
+        for (const char *s = set; *s; ++s) {
+            if (*iter == *s)
+                return (char *) iter;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
+{
+repeat:
+    for (const char *s = set; *s; ++s) {
+        if (*iter == *s) {
+            if (++iter == stop)
+                break;
+            goto repeat;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
+{
+    --iter;
+    --stop;
+repeat:
+    for (const char *s = set; *s; ++s) {
+        if (*iter == *s) {
+            if (--iter == stop)
+                break;
+            goto repeat;
+        }
+    }
+    return (char *) iter;
+}
diff --git a/netwerk/base/src/nsURLHelper.h b/netwerk/base/src/nsURLHelper.h
index 236939771bb..e366ce0bd2f 100644
--- a/netwerk/base/src/nsURLHelper.h
+++ b/netwerk/base/src/nsURLHelper.h
@@ -109,8 +109,61 @@ inline PRBool net_IsValidScheme(const nsAFlatCString &scheme)
     return net_IsValidScheme(scheme.get(), scheme.Length());
 }
 
-/* convert to lower case (XXX this needs to be factored out) */
+/*****************************************************************************
+ * generic string routines follow (XXX move to someplace more generic).
+ */
+
+/* convert to lower case */
 void net_ToLowerCase(char* str, PRUint32 length);
 void net_ToLowerCase(char* str);
 
+/**
+ * returns pointer to first character of |str| in the given set.  if not found,
+ * then |end| is returned.  stops prematurely if a null byte is encountered,
+ * and returns the address of the null byte.
+ */
+char *net_FindCharInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to first character of |str| NOT in the given set.  if all
+ * characters are in the given set, then |end| is returned.  if '\0' is not
+ * included in |set|, then stops prematurely if a null byte is encountered,
+ * and returns the address of the null byte.
+ */
+char *net_FindCharNotInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to last character of |str| in the given set.  if not found,
+ * then |str - 1| is returned.
+ */
+char *net_RFindCharInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to last character of |str| NOT in the given set.  if all
+ * characters are in the given set, then |str - 1| is returned.
+ */
+char *net_RFindCharNotInSet(const char *str, const char *end, const char *set);
+
+/* inline versions */
+
+/* remember the 64-bit platforms ;-) */
+#define NET_MAX_ADDRESS (((char*)0)-1)
+
+inline char *net_FindCharInSet(const char *str, const char *set)
+{
+    return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
+}
+inline char *net_FindCharNotInSet(const char *str, const char *set)
+{
+    return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
+}
+inline char *net_RFindCharInSet(const char *str, const char *set)
+{
+    return net_RFindCharInSet(str, str + strlen(str), set);
+}
+inline char *net_RFindCharNotInSet(const char *str, const char *set)
+{
+    return net_RFindCharNotInSet(str, str + strlen(str), set);
+}
+
 #endif // !nsURLHelper_h__
diff --git a/netwerk/protocol/http/src/nsHttp.h b/netwerk/protocol/http/src/nsHttp.h
index 3068f2aff69..a0e5d49fcad 100644
--- a/netwerk/protocol/http/src/nsHttp.h
+++ b/netwerk/protocol/http/src/nsHttp.h
@@ -33,6 +33,7 @@
 #include "prtime.h"
 #include "nsISupportsUtils.h"
 #include "nsPromiseFlatString.h"
+#include "nsURLHelper.h"
 #include "netCore.h"
 
 #if defined(PR_LOGGING)
@@ -162,4 +163,6 @@ PRTimeToSeconds(PRTime t_usec)
 // round q-value to one decimal place; return most significant digit as uint.
 #define QVAL_TO_UINT(q) ((unsigned int) ((q + 0.05) * 10.0))
 
+#define HTTP_LWS " \t"
+
 #endif // nsHttp_h__
diff --git a/netwerk/protocol/http/src/nsHttpAuthCache.h b/netwerk/protocol/http/src/nsHttpAuthCache.h
index bbdfaf48d8e..dd5b8a02571 100644
--- a/netwerk/protocol/http/src/nsHttpAuthCache.h
+++ b/netwerk/protocol/http/src/nsHttpAuthCache.h
@@ -128,9 +128,9 @@ private:
                     const char *challenge,
                     const nsHttpAuthIdentity &ident,
                     nsISupports *metadata)
-        : mRealm(nsnull)
-        , mRoot(nsnull)
+        : mRoot(nsnull)
         , mTail(nsnull)
+        , mRealm(nsnull)
     {
         Set(path, realm, creds, challenge, ident, metadata);
     }
diff --git a/netwerk/protocol/http/src/nsHttpHandler.cpp b/netwerk/protocol/http/src/nsHttpHandler.cpp
index 3101aa57ed5..cb5760be181 100644
--- a/netwerk/protocol/http/src/nsHttpHandler.cpp
+++ b/netwerk/protocol/http/src/nsHttpHandler.cpp
@@ -1145,9 +1145,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua
          token != (char *) 0;
          token = nsCRT::strtok(p, ",", &p))
     {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
         char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
         if (trim != (char*)0)  // remove "; q=..." if present
             *trim = '\0';
 
@@ -1243,9 +1243,9 @@ PrepareAcceptCharsets(const char *i_AcceptCharset, nsACString &o_AcceptCharset)
     for (token = nsCRT::strtok(o_Accept, ",", &p);
          token != (char *) 0;
          token = nsCRT::strtok(p, ",", &p)) {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
         char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
         if (trim != (char*)0)  // remove "; q=..." if present
             *trim = '\0';
 
diff --git a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
index 69cebe3f6ce..8c78c55e3a3 100644
--- a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
+++ b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
@@ -126,48 +126,57 @@ nsHttpHeaderArray::VisitHeaders(nsIHttpHeaderVisitor *visitor)
 void
 nsHttpHeaderArray::ParseHeaderLine(char *line, nsHttpAtom *hdr, char **val)
 {
-    char *p = PL_strchr(line, ':'), *p2;
+    //
+    // Augmented BNF (from section 4.2 of RFC 2616 w/ modifications):
+    //
+    //   message-header = field-name field-sep [ field-value ]
+    //   field-name     = token
+    //   field-sep      = LWS ( ":" | "=" | SP | HT )
+    //   field-value    = *( field-content | LWS )
+    //   field-content  = <the OCTETs making up the field-value
+    //                     and consisting of either *TEXT or combinations
+    //                     of token, separators, and quoted-string>
+    //
+    // Here, we allow a greater set of possible header value separators
+    // for compatibility with the vast number of broken web servers (mostly
+    // lame CGI scripts).  NN4 and IE are similarly tolerant.
+    //
+    //
+    // Examples:
+    //  
+    //   Header: Value
+    //   Header :Value
+    //   Header Value
+    //   Header=Value
+    //
 
-    // the header is malformed... but, there are malformed headers in the
-    // world.  search for ' ' and '\t' to simulate 4.x/IE behavior.
-    if (!p) {
-        p = PL_strchr(line, ' ');
-        if (!p) {
-            p = PL_strchr(line, '\t');
-            if (!p) {
-                // some broken cgi scripts even use '=' as a delimiter!!
-                p = PL_strchr(line, '=');
-            }
-        }
-    }
+    char *p = (char *) strchr(line, ':');
+    if (!p)
+        p = net_FindCharInSet(line, " \t=");
 
     if (p) {
         // ignore whitespace between header name and colon
-        p2 = p;
-        while (--p2 >= line && ((*p2 == ' ') || (*p2 == '\t')))
-            ;
-        *++p2= 0; // overwrite first char after header name
+        char *p2 = net_FindCharInSet(line, p, HTTP_LWS);
+        *p2 = 0; // null terminate header name
 
         nsHttpAtom atom = nsHttp::ResolveAtom(line);
         if (atom) {
             // skip over whitespace
-            do {
-                ++p;
-            } while ((*p == ' ') || (*p == '\t'));
+            p = net_FindCharNotInSet(++p, HTTP_LWS);
 
             // trim trailing whitespace - bug 86608
-            p2 = p + PL_strlen(p);
-            do {
-                --p2;
-            } while (p2 >= p && ((*p2 == ' ') || (*p2 == '\t')));
-            *++p2 = 0;
+            p2 = net_RFindCharNotInSet(p, HTTP_LWS);
+            *++p2 = 0; // null terminate header value; if all chars
+                       // starting at |p| consisted of LWS, then p2
+                       // would have pointed at |p-1|, so the prefix
+                       // increment is always valid.
 
             // assign return values
             if (hdr) *hdr = atom;
             if (val) *val = p;
 
             // assign response header
-            SetHeader(atom, nsDependentCString(p), PR_TRUE);
+            SetHeader(atom, nsDependentCString(p, p2 - p), PR_TRUE);
         }
         else
             LOG(("unknown header; skipping\n"));
diff --git a/netwerk/protocol/http/src/nsHttpResponseHead.cpp b/netwerk/protocol/http/src/nsHttpResponseHead.cpp
index 162fade9df5..b4ea9a9c238 100644
--- a/netwerk/protocol/http/src/nsHttpResponseHead.cpp
+++ b/netwerk/protocol/http/src/nsHttpResponseHead.cpp
@@ -300,8 +300,6 @@ nsHttpResponseHead::ComputeFreshnessLifetime(PRUint32 *result)
 PRBool
 nsHttpResponseHead::MustValidate()
 {
-    const char *val;
-
     LOG(("nsHttpResponseHead::MustValidate ??\n"));
 
     // The no-cache response header indicates that we must validate this
@@ -552,63 +550,91 @@ nsHttpResponseHead::ParseVersion(const char *str)
         mVersion = NS_HTTP_VERSION_1_0;
 }
 
-// This code is duplicated in nsMultiMixedConv.cpp.  If you change it
-// here, change it there, too!
-
 void
 nsHttpResponseHead::ParseContentType(char *type)
 {
     LOG(("nsHttpResponseHead::ParseContentType [type=%s]\n", type));
 
-    // don't bother with an empty content-type header - bug 83465
-    if (!*type)
-        return;
+    //
+    // Augmented BNF (from RFC 2616 section 3.7):
+    //
+    //   header-value = media-type *( LWS "," LWS media-type )
+    //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
+    //   type         = token
+    //   subtype      = token
+    //   parameter    = attribute "=" value
+    //   attribute    = token
+    //   value        = token | quoted-string
+    //   
+    //
+    // Examples:
+    //
+    //   text/html
+    //   text/html, text/html
+    //   text/html,text/html; charset=ISO-8859-1
+    //   text/html;charset=ISO-8859-1, text/html
+    //   application/octet-stream
+    //
 
-    // a response could have multiple content type headers... 
-    // we'll honor the last one. But for charset, we will only 
-    // honor the last one that comes with charset. 
-    mContentType.Truncate();
-
-    // we don't care about comments (although they are invalid here)
-    char *p = (char *) strchr(type, '(');
-    if (p)
-        *p = 0;
-
-    // check if the content-type has additional fields...
-    if ((p = (char *) strchr(type, ';')) != nsnull) {
-        char *p2, *p3;
-        // is there a charset field?
-        if ((p2 = PL_strcasestr(p, "charset=")) != nsnull) {
-            p2 += 8;
-
-            // check end of charset parameter
-            if ((p3 = (char *) strchr(p2, ';')) == nsnull)
-                p3 = p2 + strlen(p2);
-
-            // trim any trailing whitespace
-            do {
-                --p3;
-            } while ((*p3 == ' ') || (*p3 == '\t'));
-            *++p3 = 0; // overwrite first char after the charset field
-
-            mContentCharset = p2;
+    // iterate over media-types
+    char *nextType;
+    do {
+        nextType = (char *) strchr(type, ',');
+        if (nextType) {
+            *nextType = '\0';
+            ++nextType;
         }
-    }
-    else
-        p = type + strlen(type);
+        // type points at this media-type; locate first parameter if any
+        char *charset = "";
+        char *param = (char *) strchr(type, ';');
+        if (param) {
+            *param = '\0';
+            ++param;
 
-    // trim any trailing whitespace
-    while (--p >= type && ((*p == ' ') || (*p == '\t')))
-        ;
-    *++p = 0; // overwrite first char after the media type
+            // iterate over parameters
+            char *nextParam;
+            do {
+                nextParam = (char *) strchr(param, ';');
+                if (nextParam) {
+                    *nextParam = '\0';
+                    ++nextParam;
+                }
+                // param points at this parameter
 
-    // force the content-type to lowercase
-    while (--p >= type)
-        *p = nsCRT::ToLower(*p);
+                param = net_FindCharNotInSet(param, HTTP_LWS);
+                if (PL_strncasecmp(param, "charset=", 8) == 0)
+                    charset = param + 8;
 
-    // If the server sent "*/*", it is meaningless, so do not store it.
-    if (PL_strcmp(type, "*/*"))
-        mContentType = type;
+            } while ((param = nextParam) != nsnull);
+        }
+
+        // trim LWS leading and trailing whitespace from type and charset.
+        // charset cannot have leading whitespace.  we include '(' in the
+        // trailing trim set to catch media-type comments, which are not
+        // at all standard, but may occur in rare cases.
+
+        type = net_FindCharNotInSet(type, HTTP_LWS);
+
+        char *typeEnd    = net_FindCharInSet(type,    HTTP_LWS "(");
+        char *charsetEnd = net_FindCharInSet(charset, HTTP_LWS "(");
+
+        // force content-type to be lowercase
+        net_ToLowerCase(type, typeEnd - type);
+
+        // if the server sent "*/*", it is meaningless, so do not store it.
+        // also, if type is the same as mContentType, then just update the
+        // charset.  however, if charset is empty and mContentType hasn't
+        // changed, then don't wipe-out an existing mContentCharset.
+
+        if (*type && strcmp(type, "*/*") != 0) {
+            PRBool eq = mContentType.Equals(Substring(type, typeEnd));
+            if (!eq)
+                mContentType.Assign(type, typeEnd - type);
+            if (!eq || *charset)
+                mContentCharset.Assign(charset, charsetEnd - charset);
+        }
+
+    } while ((type = nextType) != nsnull);
 }
 
 void