fixes bug 208845 "multiple content-type headers combined breaks mozilla" r=dougt sr=alecf

2003-06-18 23:16:17 +00:00 · 2003-06-18 23:16:17 +00:00 · f307656694
--- a/netwerk/base/src/nsURLHelper.cpp
+++ b/netwerk/base/src/nsURLHelper.cpp
@ -544,3 +544,59 @@ net_ToLowerCase(char *str)
    for (; *str; ++str)
        ToLower(*str);
 }
+
+char *
+net_FindCharInSet(const char *iter, const char *stop, const char *set)
+{
+    for (; iter != stop && *iter; ++iter) {
+        for (const char *s = set; *s; ++s) {
+            if (*iter == *s)
+                return (char *) iter;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_RFindCharInSet(const char *stop, const char *iter, const char *set)
+{
+    --iter;
+    --stop;
+    for (; iter != stop; --iter) {
+        for (const char *s = set; *s; ++s) {
+            if (*iter == *s)
+                return (char *) iter;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
+{
+repeat:
+    for (const char *s = set; *s; ++s) {
+        if (*iter == *s) {
+            if (++iter == stop)
+                break;
+            goto repeat;
+        }
+    }
+    return (char *) iter;
+}
+
+char *
+net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
+{
+    --iter;
+    --stop;
+repeat:
+    for (const char *s = set; *s; ++s) {
+        if (*iter == *s) {
+            if (--iter == stop)
+                break;
+            goto repeat;
+        }
+    }
+    return (char *) iter;
+}
--- a/netwerk/base/src/nsURLHelper.h
+++ b/netwerk/base/src/nsURLHelper.h
@ -109,8 +109,61 @@ inline PRBool net_IsValidScheme(const nsAFlatCString &scheme)
    return net_IsValidScheme(scheme.get(), scheme.Length());
 }

-/* convert to lower case (XXX this needs to be factored out) */
+/*****************************************************************************
+ * generic string routines follow (XXX move to someplace more generic).
+ */
+
+/* convert to lower case */
 void net_ToLowerCase(char* str, PRUint32 length);
 void net_ToLowerCase(char* str);

+/**
+ * returns pointer to first character of |str| in the given set.  if not found,
+ * then |end| is returned.  stops prematurely if a null byte is encountered,
+ * and returns the address of the null byte.
+ */
+char *net_FindCharInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to first character of |str| NOT in the given set.  if all
+ * characters are in the given set, then |end| is returned.  if '\0' is not
+ * included in |set|, then stops prematurely if a null byte is encountered,
+ * and returns the address of the null byte.
+ */
+char *net_FindCharNotInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to last character of |str| in the given set.  if not found,
+ * then |str - 1| is returned.
+ */
+char *net_RFindCharInSet(const char *str, const char *end, const char *set);
+
+/**
+ * returns pointer to last character of |str| NOT in the given set.  if all
+ * characters are in the given set, then |str - 1| is returned.
+ */
+char *net_RFindCharNotInSet(const char *str, const char *end, const char *set);
+
+/* inline versions */
+
+/* remember the 64-bit platforms ;-) */
+#define NET_MAX_ADDRESS (((char*)0)-1)
+
+inline char *net_FindCharInSet(const char *str, const char *set)
+{
+    return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
+}
+inline char *net_FindCharNotInSet(const char *str, const char *set)
+{
+    return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
+}
+inline char *net_RFindCharInSet(const char *str, const char *set)
+{
+    return net_RFindCharInSet(str, str + strlen(str), set);
+}
+inline char *net_RFindCharNotInSet(const char *str, const char *set)
+{
+    return net_RFindCharNotInSet(str, str + strlen(str), set);
+}
+
 #endif // !nsURLHelper_h__
--- a/netwerk/protocol/http/src/nsHttp.h
+++ b/netwerk/protocol/http/src/nsHttp.h
@ -33,6 +33,7 @@
 #include "prtime.h"
 #include "nsISupportsUtils.h"
 #include "nsPromiseFlatString.h"
+#include "nsURLHelper.h"
 #include "netCore.h"

 #if defined(PR_LOGGING)
@ -162,4 +163,6 @@ PRTimeToSeconds(PRTime t_usec)
 // round q-value to one decimal place; return most significant digit as uint.
 #define QVAL_TO_UINT(q) ((unsigned int) ((q + 0.05) * 10.0))

+#define HTTP_LWS " \t"
+
 #endif // nsHttp_h__
--- a/netwerk/protocol/http/src/nsHttpAuthCache.h
+++ b/netwerk/protocol/http/src/nsHttpAuthCache.h
@ -128,9 +128,9 @@ private:
                    const char *challenge,
                    const nsHttpAuthIdentity &ident,
                    nsISupports *metadata)
-        : mRealm(nsnull)
-        , mRoot(nsnull)
+        : mRoot(nsnull)
        , mTail(nsnull)
+        , mRealm(nsnull)
    {
        Set(path, realm, creds, challenge, ident, metadata);
    }
--- a/netwerk/protocol/http/src/nsHttpHandler.cpp
+++ b/netwerk/protocol/http/src/nsHttpHandler.cpp
@ -1145,9 +1145,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua
         token != (char *) 0;
         token = nsCRT::strtok(p, ",", &p))
    {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
        char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
        if (trim != (char*)0)  // remove "; q=..." if present
            *trim = '\0';

@ -1243,9 +1243,9 @@ PrepareAcceptCharsets(const char *i_AcceptCharset, nsACString &o_AcceptCharset)
    for (token = nsCRT::strtok(o_Accept, ",", &p);
         token != (char *) 0;
         token = nsCRT::strtok(p, ",", &p)) {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
        char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
        if (trim != (char*)0)  // remove "; q=..." if present
            *trim = '\0';

--- a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
+++ b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
@ -126,48 +126,57 @@ nsHttpHeaderArray::VisitHeaders(nsIHttpHeaderVisitor *visitor)
 void
 nsHttpHeaderArray::ParseHeaderLine(char *line, nsHttpAtom *hdr, char **val)
 {
-    char *p = PL_strchr(line, ':'), *p2;
+    //
+    // Augmented BNF (from section 4.2 of RFC 2616 w/ modifications):
+    //
+    //   message-header = field-name field-sep [ field-value ]
+    //   field-name     = token
+    //   field-sep      = LWS ( ":" | "=" | SP | HT )
+    //   field-value    = *( field-content | LWS )
+    //   field-content  = <the OCTETs making up the field-value
+    //                     and consisting of either *TEXT or combinations
+    //                     of token, separators, and quoted-string>
+    //
+    // Here, we allow a greater set of possible header value separators
+    // for compatibility with the vast number of broken web servers (mostly
+    // lame CGI scripts).  NN4 and IE are similarly tolerant.
+    //
+    //
+    // Examples:
+    //  
+    //   Header: Value
+    //   Header :Value
+    //   Header Value
+    //   Header=Value
+    //

-    // the header is malformed... but, there are malformed headers in the
-    // world.  search for ' ' and '\t' to simulate 4.x/IE behavior.
-    if (!p) {
-        p = PL_strchr(line, ' ');
-        if (!p) {
-            p = PL_strchr(line, '\t');
-            if (!p) {
-                // some broken cgi scripts even use '=' as a delimiter!!
-                p = PL_strchr(line, '=');
-            }
-        }
-    }
+    char *p = (char *) strchr(line, ':');
+    if (!p)
+        p = net_FindCharInSet(line, " \t=");

    if (p) {
        // ignore whitespace between header name and colon
-        p2 = p;
-        while (--p2 >= line && ((*p2 == ' ') || (*p2 == '\t')))
-            ;
-        *++p2= 0; // overwrite first char after header name
+        char *p2 = net_FindCharInSet(line, p, HTTP_LWS);
+        *p2 = 0; // null terminate header name

        nsHttpAtom atom = nsHttp::ResolveAtom(line);
        if (atom) {
            // skip over whitespace
-            do {
-                ++p;
-            } while ((*p == ' ') || (*p == '\t'));
+            p = net_FindCharNotInSet(++p, HTTP_LWS);

            // trim trailing whitespace - bug 86608
-            p2 = p + PL_strlen(p);
-            do {
-                --p2;
-            } while (p2 >= p && ((*p2 == ' ') || (*p2 == '\t')));
-            *++p2 = 0;
+            p2 = net_RFindCharNotInSet(p, HTTP_LWS);
+            *++p2 = 0; // null terminate header value; if all chars
+                       // starting at |p| consisted of LWS, then p2
+                       // would have pointed at |p-1|, so the prefix
+                       // increment is always valid.

            // assign return values
            if (hdr) *hdr = atom;
            if (val) *val = p;

            // assign response header
-            SetHeader(atom, nsDependentCString(p), PR_TRUE);
+            SetHeader(atom, nsDependentCString(p, p2 - p), PR_TRUE);
        }
        else
            LOG(("unknown header; skipping\n"));
--- a/netwerk/protocol/http/src/nsHttpResponseHead.cpp
+++ b/netwerk/protocol/http/src/nsHttpResponseHead.cpp
@ -300,8 +300,6 @@ nsHttpResponseHead::ComputeFreshnessLifetime(PRUint32 *result)
 PRBool
 nsHttpResponseHead::MustValidate()
 {
-    const char *val;
-
    LOG(("nsHttpResponseHead::MustValidate ??\n"));

    // The no-cache response header indicates that we must validate this
@ -552,63 +550,91 @@ nsHttpResponseHead::ParseVersion(const char *str)
        mVersion = NS_HTTP_VERSION_1_0;
 }

-// This code is duplicated in nsMultiMixedConv.cpp.  If you change it
-// here, change it there, too!
-
 void
 nsHttpResponseHead::ParseContentType(char *type)
 {
    LOG(("nsHttpResponseHead::ParseContentType [type=%s]\n", type));

-    // don't bother with an empty content-type header - bug 83465
-    if (!*type)
-        return;
+    //
+    // Augmented BNF (from RFC 2616 section 3.7):
+    //
+    //   header-value = media-type *( LWS "," LWS media-type )
+    //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
+    //   type         = token
+    //   subtype      = token
+    //   parameter    = attribute "=" value
+    //   attribute    = token
+    //   value        = token | quoted-string
+    //   
+    //
+    // Examples:
+    //
+    //   text/html
+    //   text/html, text/html
+    //   text/html,text/html; charset=ISO-8859-1
+    //   text/html;charset=ISO-8859-1, text/html
+    //   application/octet-stream
+    //

-    // a response could have multiple content type headers... 
-    // we'll honor the last one. But for charset, we will only 
-    // honor the last one that comes with charset. 
-    mContentType.Truncate();
-
-    // we don't care about comments (although they are invalid here)
-    char *p = (char *) strchr(type, '(');
-    if (p)
-        *p = 0;
-
-    // check if the content-type has additional fields...
-    if ((p = (char *) strchr(type, ';')) != nsnull) {
-        char *p2, *p3;
-        // is there a charset field?
-        if ((p2 = PL_strcasestr(p, "charset=")) != nsnull) {
-            p2 += 8;
-
-            // check end of charset parameter
-            if ((p3 = (char *) strchr(p2, ';')) == nsnull)
-                p3 = p2 + strlen(p2);
-
-            // trim any trailing whitespace
+    // iterate over media-types
+    char *nextType;
    do {
-                --p3;
-            } while ((*p3 == ' ') || (*p3 == '\t'));
-            *++p3 = 0; // overwrite first char after the charset field
-
-            mContentCharset = p2;
+        nextType = (char *) strchr(type, ',');
+        if (nextType) {
+            *nextType = '\0';
+            ++nextType;
        }
+        // type points at this media-type; locate first parameter if any
+        char *charset = "";
+        char *param = (char *) strchr(type, ';');
+        if (param) {
+            *param = '\0';
+            ++param;
+
+            // iterate over parameters
+            char *nextParam;
+            do {
+                nextParam = (char *) strchr(param, ';');
+                if (nextParam) {
+                    *nextParam = '\0';
+                    ++nextParam;
                }
-    else
-        p = type + strlen(type);
+                // param points at this parameter

-    // trim any trailing whitespace
-    while (--p >= type && ((*p == ' ') || (*p == '\t')))
-        ;
-    *++p = 0; // overwrite first char after the media type
+                param = net_FindCharNotInSet(param, HTTP_LWS);
+                if (PL_strncasecmp(param, "charset=", 8) == 0)
+                    charset = param + 8;

-    // force the content-type to lowercase
-    while (--p >= type)
-        *p = nsCRT::ToLower(*p);
+            } while ((param = nextParam) != nsnull);
+        }

-    // If the server sent "*/*", it is meaningless, so do not store it.
-    if (PL_strcmp(type, "*/*"))
-        mContentType = type;
+        // trim LWS leading and trailing whitespace from type and charset.
+        // charset cannot have leading whitespace.  we include '(' in the
+        // trailing trim set to catch media-type comments, which are not
+        // at all standard, but may occur in rare cases.
+
+        type = net_FindCharNotInSet(type, HTTP_LWS);
+
+        char *typeEnd    = net_FindCharInSet(type,    HTTP_LWS "(");
+        char *charsetEnd = net_FindCharInSet(charset, HTTP_LWS "(");
+
+        // force content-type to be lowercase
+        net_ToLowerCase(type, typeEnd - type);
+
+        // if the server sent "*/*", it is meaningless, so do not store it.
+        // also, if type is the same as mContentType, then just update the
+        // charset.  however, if charset is empty and mContentType hasn't
+        // changed, then don't wipe-out an existing mContentCharset.
+
+        if (*type && strcmp(type, "*/*") != 0) {
+            PRBool eq = mContentType.Equals(Substring(type, typeEnd));
+            if (!eq)
+                mContentType.Assign(type, typeEnd - type);
+            if (!eq || *charset)
+                mContentCharset.Assign(charset, charsetEnd - charset);
+        }
+
+    } while ((type = nextType) != nsnull);
 }

 void