fixes bug 208845 "multiple content-type headers combined breaks mozilla" r=dougt sr=alecf

2003-06-18 23:16:17 +00:00 · 2003-06-18 23:16:17 +00:00 · 7155f9d29a
--- a/netwerk/base/src/nsURLHelper.cpp
+++ b/netwerk/base/src/nsURLHelper.cpp
@ -544,3 +544,59 @@ net_ToLowerCase(char *str)
    for (; *str; ++str)
        ToLower(*str);
 }
 char *
 net_FindCharInSet(const char *iter, const char *stop, const char *set)
 {
    for (; iter != stop && *iter; ++iter) {
        for (const char *s = set; *s; ++s) {
            if (*iter == *s)
                return (char *) iter;
        }
    }
    return (char *) iter;
 }
 char *
 net_RFindCharInSet(const char *stop, const char *iter, const char *set)
 {
    --iter;
    --stop;
    for (; iter != stop; --iter) {
        for (const char *s = set; *s; ++s) {
            if (*iter == *s)
                return (char *) iter;
        }
    }
    return (char *) iter;
 }
 char *
 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
 {
 repeat:
    for (const char *s = set; *s; ++s) {
        if (*iter == *s) {
            if (++iter == stop)
                break;
            goto repeat;
        }
    }
    return (char *) iter;
 }
 char *
 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
 {
    --iter;
    --stop;
 repeat:
    for (const char *s = set; *s; ++s) {
        if (*iter == *s) {
            if (--iter == stop)
                break;
            goto repeat;
        }
    }
    return (char *) iter;
 }
--- a/netwerk/base/src/nsURLHelper.h
+++ b/netwerk/base/src/nsURLHelper.h
@ -109,8 +109,61 @@ inline PRBool net_IsValidScheme(const nsAFlatCString &scheme)
    return net_IsValidScheme(scheme.get(), scheme.Length());
 }
-/* convert to lower case (XXX this needs to be factored out) */
+/*****************************************************************************
 * generic string routines follow (XXX move to someplace more generic).
 */
 /* convert to lower case */
 void net_ToLowerCase(char* str, PRUint32 length);
 void net_ToLowerCase(char* str);
 /**
 * returns pointer to first character of |str| in the given set.  if not found,
 * then |end| is returned.  stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
 char *net_FindCharInSet(const char *str, const char *end, const char *set);
 /**
 * returns pointer to first character of |str| NOT in the given set.  if all
 * characters are in the given set, then |end| is returned.  if '\0' is not
 * included in |set|, then stops prematurely if a null byte is encountered,
 * and returns the address of the null byte.
 */
 char *net_FindCharNotInSet(const char *str, const char *end, const char *set);
 /**
 * returns pointer to last character of |str| in the given set.  if not found,
 * then |str - 1| is returned.
 */
 char *net_RFindCharInSet(const char *str, const char *end, const char *set);
 /**
 * returns pointer to last character of |str| NOT in the given set.  if all
 * characters are in the given set, then |str - 1| is returned.
 */
 char *net_RFindCharNotInSet(const char *str, const char *end, const char *set);
 /* inline versions */
 /* remember the 64-bit platforms ;-) */
 #define NET_MAX_ADDRESS (((char*)0)-1)
 inline char *net_FindCharInSet(const char *str, const char *set)
 {
    return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
 }
 inline char *net_FindCharNotInSet(const char *str, const char *set)
 {
    return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
 }
 inline char *net_RFindCharInSet(const char *str, const char *set)
 {
    return net_RFindCharInSet(str, str + strlen(str), set);
 }
 inline char *net_RFindCharNotInSet(const char *str, const char *set)
 {
    return net_RFindCharNotInSet(str, str + strlen(str), set);
 }
 #endif // !nsURLHelper_h__
--- a/netwerk/protocol/http/src/nsHttp.h
+++ b/netwerk/protocol/http/src/nsHttp.h
@ -33,6 +33,7 @@
 #include "prtime.h"
 #include "nsISupportsUtils.h"
 #include "nsPromiseFlatString.h"
 #include "nsURLHelper.h"
 #include "netCore.h"
 #if defined(PR_LOGGING)
@ -162,4 +163,6 @@ PRTimeToSeconds(PRTime t_usec)
 // round q-value to one decimal place; return most significant digit as uint.
 #define QVAL_TO_UINT(q) ((unsigned int) ((q + 0.05) * 10.0))
 #define HTTP_LWS " \t"
 #endif // nsHttp_h__
--- a/netwerk/protocol/http/src/nsHttpAuthCache.h
+++ b/netwerk/protocol/http/src/nsHttpAuthCache.h
@ -128,9 +128,9 @@ private:
                    const char *challenge,
                    const nsHttpAuthIdentity &ident,
                    nsISupports *metadata)
-        : mRealm(nsnull)
+        : mRoot(nsnull)
        , mRoot(nsnull)
        , mTail(nsnull)
        , mRealm(nsnull)
    {
        Set(path, realm, creds, challenge, ident, metadata);
    }
--- a/netwerk/protocol/http/src/nsHttpHandler.cpp
+++ b/netwerk/protocol/http/src/nsHttpHandler.cpp
@ -1145,9 +1145,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua
         token != (char *) 0;
         token = nsCRT::strtok(p, ",", &p))
    {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
        char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
        if (trim != (char*)0)  // remove "; q=..." if present
            *trim = '\0';
@ -1243,9 +1243,9 @@ PrepareAcceptCharsets(const char *i_AcceptCharset, nsACString &o_AcceptCharset)
    for (token = nsCRT::strtok(o_Accept, ",", &p);
         token != (char *) 0;
         token = nsCRT::strtok(p, ",", &p)) {
-        while (*token == ' ' || *token == '\x9') token++;
+        token = net_FindCharNotInSet(token, HTTP_LWS);
        char* trim;
-        trim = PL_strpbrk(token, "; \x9");
+        trim = net_FindCharInSet(token, ";" HTTP_LWS);
        if (trim != (char*)0)  // remove "; q=..." if present
            *trim = '\0';
--- a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
+++ b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp
@ -126,48 +126,57 @@ nsHttpHeaderArray::VisitHeaders(nsIHttpHeaderVisitor *visitor)
 void
 nsHttpHeaderArray::ParseHeaderLine(char *line, nsHttpAtom *hdr, char **val)
 {
-    char *p = PL_strchr(line, ':'), *p2;
+    //
    // Augmented BNF (from section 4.2 of RFC 2616 w/ modifications):
    //
    //   message-header = field-name field-sep [ field-value ]
    //   field-name     = token
    //   field-sep      = LWS ( ":" | "=" | SP | HT )
    //   field-value    = *( field-content | LWS )
    //   field-content  = <the OCTETs making up the field-value
    //                     and consisting of either *TEXT or combinations
    //                     of token, separators, and quoted-string>
    //
    // Here, we allow a greater set of possible header value separators
    // for compatibility with the vast number of broken web servers (mostly
    // lame CGI scripts).  NN4 and IE are similarly tolerant.
    //
    //
    // Examples:
    //  
    //   Header: Value
    //   Header :Value
    //   Header Value
    //   Header=Value
    //
-    // the header is malformed... but, there are malformed headers in the
+    char *p = (char *) strchr(line, ':');
-    // world.  search for ' ' and '\t' to simulate 4.x/IE behavior.
+    if (!p)
-    if (!p) {
+        p = net_FindCharInSet(line, " \t=");
        p = PL_strchr(line, ' ');
        if (!p) {
            p = PL_strchr(line, '\t');
            if (!p) {
                // some broken cgi scripts even use '=' as a delimiter!!
                p = PL_strchr(line, '=');
            }
        }
    }
    if (p) {
        // ignore whitespace between header name and colon
-        p2 = p;
+        char *p2 = net_FindCharInSet(line, p, HTTP_LWS);
-        while (--p2 >= line && ((*p2 == ' ') || (*p2 == '\t')))
+        *p2 = 0; // null terminate header name
            ;
        *++p2= 0; // overwrite first char after header name
        nsHttpAtom atom = nsHttp::ResolveAtom(line);
        if (atom) {
            // skip over whitespace
-            do {
+            p = net_FindCharNotInSet(++p, HTTP_LWS);
                ++p;
            } while ((*p == ' ') || (*p == '\t'));
            // trim trailing whitespace - bug 86608
-            p2 = p + PL_strlen(p);
+            p2 = net_RFindCharNotInSet(p, HTTP_LWS);
-            do {
+            *++p2 = 0; // null terminate header value; if all chars
-                --p2;
+                       // starting at |p| consisted of LWS, then p2
-            } while (p2 >= p && ((*p2 == ' ') || (*p2 == '\t')));
+                       // would have pointed at |p-1|, so the prefix
-            *++p2 = 0;
+                       // increment is always valid.
            // assign return values
            if (hdr) *hdr = atom;
            if (val) *val = p;
            // assign response header
-            SetHeader(atom, nsDependentCString(p), PR_TRUE);
+            SetHeader(atom, nsDependentCString(p, p2 - p), PR_TRUE);
        }
        else
            LOG(("unknown header; skipping\n"));
--- a/netwerk/protocol/http/src/nsHttpResponseHead.cpp
+++ b/netwerk/protocol/http/src/nsHttpResponseHead.cpp
@ -300,8 +300,6 @@ nsHttpResponseHead::ComputeFreshnessLifetime(PRUint32 *result)
 PRBool
 nsHttpResponseHead::MustValidate()
 {
    const char *val;
    LOG(("nsHttpResponseHead::MustValidate ??\n"));
    // The no-cache response header indicates that we must validate this
@ -552,63 +550,91 @@ nsHttpResponseHead::ParseVersion(const char *str)
        mVersion = NS_HTTP_VERSION_1_0;
 }
 // This code is duplicated in nsMultiMixedConv.cpp.  If you change it
 // here, change it there, too!
 void
 nsHttpResponseHead::ParseContentType(char *type)
 {
    LOG(("nsHttpResponseHead::ParseContentType [type=%s]\n", type));
-    // don't bother with an empty content-type header - bug 83465
+    //
-    if (!*type)
+    // Augmented BNF (from RFC 2616 section 3.7):
-        return;
+    //
    //   header-value = media-type *( LWS "," LWS media-type )
    //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
    //   type         = token
    //   subtype      = token
    //   parameter    = attribute "=" value
    //   attribute    = token
    //   value        = token | quoted-string
    //   
    //
    // Examples:
    //
    //   text/html
    //   text/html, text/html
    //   text/html,text/html; charset=ISO-8859-1
    //   text/html;charset=ISO-8859-1, text/html
    //   application/octet-stream
    //
-    // a response could have multiple content type headers... 
+    // iterate over media-types
-    // we'll honor the last one. But for charset, we will only 
+    char *nextType;
-    // honor the last one that comes with charset. 
+    do {
-    mContentType.Truncate();
+        nextType = (char *) strchr(type, ',');
-
+        if (nextType) {
-    // we don't care about comments (although they are invalid here)
+            *nextType = '\0';
-    char *p = (char *) strchr(type, '(');
+            ++nextType;
    if (p)
        *p = 0;
    // check if the content-type has additional fields...
    if ((p = (char *) strchr(type, ';')) != nsnull) {
        char *p2, *p3;
        // is there a charset field?
        if ((p2 = PL_strcasestr(p, "charset=")) != nsnull) {
            p2 += 8;
            // check end of charset parameter
            if ((p3 = (char *) strchr(p2, ';')) == nsnull)
                p3 = p2 + strlen(p2);
            // trim any trailing whitespace
            do {
                --p3;
            } while ((*p3 == ' ') || (*p3 == '\t'));
            *++p3 = 0; // overwrite first char after the charset field
            mContentCharset = p2;
        }
-    }
+        // type points at this media-type; locate first parameter if any
-    else
+        char *charset = "";
-        p = type + strlen(type);
+        char *param = (char *) strchr(type, ';');
        if (param) {
            *param = '\0';
            ++param;
-    // trim any trailing whitespace
+            // iterate over parameters
-    while (--p >= type && ((*p == ' ') || (*p == '\t')))
+            char *nextParam;
-        ;
+            do {
-    *++p = 0; // overwrite first char after the media type
+                nextParam = (char *) strchr(param, ';');
                if (nextParam) {
                    *nextParam = '\0';
                    ++nextParam;
                }
                // param points at this parameter
-    // force the content-type to lowercase
+                param = net_FindCharNotInSet(param, HTTP_LWS);
-    while (--p >= type)
+                if (PL_strncasecmp(param, "charset=", 8) == 0)
-        *p = nsCRT::ToLower(*p);
+                    charset = param + 8;
-    // If the server sent "*/*", it is meaningless, so do not store it.
+            } while ((param = nextParam) != nsnull);
-    if (PL_strcmp(type, "*/*"))
+        }
-        mContentType = type;
+
        // trim LWS leading and trailing whitespace from type and charset.
        // charset cannot have leading whitespace.  we include '(' in the
        // trailing trim set to catch media-type comments, which are not
        // at all standard, but may occur in rare cases.
        type = net_FindCharNotInSet(type, HTTP_LWS);
        char *typeEnd    = net_FindCharInSet(type,    HTTP_LWS "(");
        char *charsetEnd = net_FindCharInSet(charset, HTTP_LWS "(");
        // force content-type to be lowercase
        net_ToLowerCase(type, typeEnd - type);
        // if the server sent "*/*", it is meaningless, so do not store it.
        // also, if type is the same as mContentType, then just update the
        // charset.  however, if charset is empty and mContentType hasn't
        // changed, then don't wipe-out an existing mContentCharset.
        if (*type && strcmp(type, "*/*") != 0) {
            PRBool eq = mContentType.Equals(Substring(type, typeEnd));
            if (!eq)
                mContentType.Assign(type, typeEnd - type);
            if (!eq || *charset)
                mContentCharset.Assign(charset, charsetEnd - charset);
        }
    } while ((type = nextType) != nsnull);
 }
 void