From f3076566940a76b02527b5308acf08f236f0f599 Mon Sep 17 00:00:00 2001 From: "darin%netscape.com" Date: Wed, 18 Jun 2003 23:16:17 +0000 Subject: [PATCH] fixes bug 208845 "multiple content-type headers combined breaks mozilla" r=dougt sr=alecf --- netwerk/base/src/nsURLHelper.cpp | 56 ++++++++ netwerk/base/src/nsURLHelper.h | 55 +++++++- netwerk/protocol/http/src/nsHttp.h | 3 + netwerk/protocol/http/src/nsHttpAuthCache.h | 4 +- netwerk/protocol/http/src/nsHttpHandler.cpp | 8 +- .../protocol/http/src/nsHttpHeaderArray.cpp | 61 +++++---- .../protocol/http/src/nsHttpResponseHead.cpp | 124 +++++++++++------- 7 files changed, 229 insertions(+), 82 deletions(-) diff --git a/netwerk/base/src/nsURLHelper.cpp b/netwerk/base/src/nsURLHelper.cpp index 9a9bcb654d2..f67251758fc 100644 --- a/netwerk/base/src/nsURLHelper.cpp +++ b/netwerk/base/src/nsURLHelper.cpp @@ -544,3 +544,59 @@ net_ToLowerCase(char *str) for (; *str; ++str) ToLower(*str); } + +char * +net_FindCharInSet(const char *iter, const char *stop, const char *set) +{ + for (; iter != stop && *iter; ++iter) { + for (const char *s = set; *s; ++s) { + if (*iter == *s) + return (char *) iter; + } + } + return (char *) iter; +} + +char * +net_RFindCharInSet(const char *stop, const char *iter, const char *set) +{ + --iter; + --stop; + for (; iter != stop; --iter) { + for (const char *s = set; *s; ++s) { + if (*iter == *s) + return (char *) iter; + } + } + return (char *) iter; +} + +char * +net_FindCharNotInSet(const char *iter, const char *stop, const char *set) +{ +repeat: + for (const char *s = set; *s; ++s) { + if (*iter == *s) { + if (++iter == stop) + break; + goto repeat; + } + } + return (char *) iter; +} + +char * +net_RFindCharNotInSet(const char *stop, const char *iter, const char *set) +{ + --iter; + --stop; +repeat: + for (const char *s = set; *s; ++s) { + if (*iter == *s) { + if (--iter == stop) + break; + goto repeat; + } + } + return (char *) iter; +} diff --git a/netwerk/base/src/nsURLHelper.h b/netwerk/base/src/nsURLHelper.h index 236939771bb..e366ce0bd2f 100644 --- a/netwerk/base/src/nsURLHelper.h +++ b/netwerk/base/src/nsURLHelper.h @@ -109,8 +109,61 @@ inline PRBool net_IsValidScheme(const nsAFlatCString &scheme) return net_IsValidScheme(scheme.get(), scheme.Length()); } -/* convert to lower case (XXX this needs to be factored out) */ +/***************************************************************************** + * generic string routines follow (XXX move to someplace more generic). + */ + +/* convert to lower case */ void net_ToLowerCase(char* str, PRUint32 length); void net_ToLowerCase(char* str); +/** + * returns pointer to first character of |str| in the given set. if not found, + * then |end| is returned. stops prematurely if a null byte is encountered, + * and returns the address of the null byte. + */ +char *net_FindCharInSet(const char *str, const char *end, const char *set); + +/** + * returns pointer to first character of |str| NOT in the given set. if all + * characters are in the given set, then |end| is returned. if '\0' is not + * included in |set|, then stops prematurely if a null byte is encountered, + * and returns the address of the null byte. + */ +char *net_FindCharNotInSet(const char *str, const char *end, const char *set); + +/** + * returns pointer to last character of |str| in the given set. if not found, + * then |str - 1| is returned. + */ +char *net_RFindCharInSet(const char *str, const char *end, const char *set); + +/** + * returns pointer to last character of |str| NOT in the given set. if all + * characters are in the given set, then |str - 1| is returned. + */ +char *net_RFindCharNotInSet(const char *str, const char *end, const char *set); + +/* inline versions */ + +/* remember the 64-bit platforms ;-) */ +#define NET_MAX_ADDRESS (((char*)0)-1) + +inline char *net_FindCharInSet(const char *str, const char *set) +{ + return net_FindCharInSet(str, NET_MAX_ADDRESS, set); +} +inline char *net_FindCharNotInSet(const char *str, const char *set) +{ + return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set); +} +inline char *net_RFindCharInSet(const char *str, const char *set) +{ + return net_RFindCharInSet(str, str + strlen(str), set); +} +inline char *net_RFindCharNotInSet(const char *str, const char *set) +{ + return net_RFindCharNotInSet(str, str + strlen(str), set); +} + #endif // !nsURLHelper_h__ diff --git a/netwerk/protocol/http/src/nsHttp.h b/netwerk/protocol/http/src/nsHttp.h index 3068f2aff69..a0e5d49fcad 100644 --- a/netwerk/protocol/http/src/nsHttp.h +++ b/netwerk/protocol/http/src/nsHttp.h @@ -33,6 +33,7 @@ #include "prtime.h" #include "nsISupportsUtils.h" #include "nsPromiseFlatString.h" +#include "nsURLHelper.h" #include "netCore.h" #if defined(PR_LOGGING) @@ -162,4 +163,6 @@ PRTimeToSeconds(PRTime t_usec) // round q-value to one decimal place; return most significant digit as uint. #define QVAL_TO_UINT(q) ((unsigned int) ((q + 0.05) * 10.0)) +#define HTTP_LWS " \t" + #endif // nsHttp_h__ diff --git a/netwerk/protocol/http/src/nsHttpAuthCache.h b/netwerk/protocol/http/src/nsHttpAuthCache.h index bbdfaf48d8e..dd5b8a02571 100644 --- a/netwerk/protocol/http/src/nsHttpAuthCache.h +++ b/netwerk/protocol/http/src/nsHttpAuthCache.h @@ -128,9 +128,9 @@ private: const char *challenge, const nsHttpAuthIdentity &ident, nsISupports *metadata) - : mRealm(nsnull) - , mRoot(nsnull) + : mRoot(nsnull) , mTail(nsnull) + , mRealm(nsnull) { Set(path, realm, creds, challenge, ident, metadata); } diff --git a/netwerk/protocol/http/src/nsHttpHandler.cpp b/netwerk/protocol/http/src/nsHttpHandler.cpp index 3101aa57ed5..cb5760be181 100644 --- a/netwerk/protocol/http/src/nsHttpHandler.cpp +++ b/netwerk/protocol/http/src/nsHttpHandler.cpp @@ -1145,9 +1145,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua token != (char *) 0; token = nsCRT::strtok(p, ",", &p)) { - while (*token == ' ' || *token == '\x9') token++; + token = net_FindCharNotInSet(token, HTTP_LWS); char* trim; - trim = PL_strpbrk(token, "; \x9"); + trim = net_FindCharInSet(token, ";" HTTP_LWS); if (trim != (char*)0) // remove "; q=..." if present *trim = '\0'; @@ -1243,9 +1243,9 @@ PrepareAcceptCharsets(const char *i_AcceptCharset, nsACString &o_AcceptCharset) for (token = nsCRT::strtok(o_Accept, ",", &p); token != (char *) 0; token = nsCRT::strtok(p, ",", &p)) { - while (*token == ' ' || *token == '\x9') token++; + token = net_FindCharNotInSet(token, HTTP_LWS); char* trim; - trim = PL_strpbrk(token, "; \x9"); + trim = net_FindCharInSet(token, ";" HTTP_LWS); if (trim != (char*)0) // remove "; q=..." if present *trim = '\0'; diff --git a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp index 69cebe3f6ce..8c78c55e3a3 100644 --- a/netwerk/protocol/http/src/nsHttpHeaderArray.cpp +++ b/netwerk/protocol/http/src/nsHttpHeaderArray.cpp @@ -126,48 +126,57 @@ nsHttpHeaderArray::VisitHeaders(nsIHttpHeaderVisitor *visitor) void nsHttpHeaderArray::ParseHeaderLine(char *line, nsHttpAtom *hdr, char **val) { - char *p = PL_strchr(line, ':'), *p2; + // + // Augmented BNF (from section 4.2 of RFC 2616 w/ modifications): + // + // message-header = field-name field-sep [ field-value ] + // field-name = token + // field-sep = LWS ( ":" | "=" | SP | HT ) + // field-value = *( field-content | LWS ) + // field-content = + // + // Here, we allow a greater set of possible header value separators + // for compatibility with the vast number of broken web servers (mostly + // lame CGI scripts). NN4 and IE are similarly tolerant. + // + // + // Examples: + // + // Header: Value + // Header :Value + // Header Value + // Header=Value + // - // the header is malformed... but, there are malformed headers in the - // world. search for ' ' and '\t' to simulate 4.x/IE behavior. - if (!p) { - p = PL_strchr(line, ' '); - if (!p) { - p = PL_strchr(line, '\t'); - if (!p) { - // some broken cgi scripts even use '=' as a delimiter!! - p = PL_strchr(line, '='); - } - } - } + char *p = (char *) strchr(line, ':'); + if (!p) + p = net_FindCharInSet(line, " \t="); if (p) { // ignore whitespace between header name and colon - p2 = p; - while (--p2 >= line && ((*p2 == ' ') || (*p2 == '\t'))) - ; - *++p2= 0; // overwrite first char after header name + char *p2 = net_FindCharInSet(line, p, HTTP_LWS); + *p2 = 0; // null terminate header name nsHttpAtom atom = nsHttp::ResolveAtom(line); if (atom) { // skip over whitespace - do { - ++p; - } while ((*p == ' ') || (*p == '\t')); + p = net_FindCharNotInSet(++p, HTTP_LWS); // trim trailing whitespace - bug 86608 - p2 = p + PL_strlen(p); - do { - --p2; - } while (p2 >= p && ((*p2 == ' ') || (*p2 == '\t'))); - *++p2 = 0; + p2 = net_RFindCharNotInSet(p, HTTP_LWS); + *++p2 = 0; // null terminate header value; if all chars + // starting at |p| consisted of LWS, then p2 + // would have pointed at |p-1|, so the prefix + // increment is always valid. // assign return values if (hdr) *hdr = atom; if (val) *val = p; // assign response header - SetHeader(atom, nsDependentCString(p), PR_TRUE); + SetHeader(atom, nsDependentCString(p, p2 - p), PR_TRUE); } else LOG(("unknown header; skipping\n")); diff --git a/netwerk/protocol/http/src/nsHttpResponseHead.cpp b/netwerk/protocol/http/src/nsHttpResponseHead.cpp index 162fade9df5..b4ea9a9c238 100644 --- a/netwerk/protocol/http/src/nsHttpResponseHead.cpp +++ b/netwerk/protocol/http/src/nsHttpResponseHead.cpp @@ -300,8 +300,6 @@ nsHttpResponseHead::ComputeFreshnessLifetime(PRUint32 *result) PRBool nsHttpResponseHead::MustValidate() { - const char *val; - LOG(("nsHttpResponseHead::MustValidate ??\n")); // The no-cache response header indicates that we must validate this @@ -552,63 +550,91 @@ nsHttpResponseHead::ParseVersion(const char *str) mVersion = NS_HTTP_VERSION_1_0; } -// This code is duplicated in nsMultiMixedConv.cpp. If you change it -// here, change it there, too! - void nsHttpResponseHead::ParseContentType(char *type) { LOG(("nsHttpResponseHead::ParseContentType [type=%s]\n", type)); - // don't bother with an empty content-type header - bug 83465 - if (!*type) - return; + // + // Augmented BNF (from RFC 2616 section 3.7): + // + // header-value = media-type *( LWS "," LWS media-type ) + // media-type = type "/" subtype *( LWS ";" LWS parameter ) + // type = token + // subtype = token + // parameter = attribute "=" value + // attribute = token + // value = token | quoted-string + // + // + // Examples: + // + // text/html + // text/html, text/html + // text/html,text/html; charset=ISO-8859-1 + // text/html;charset=ISO-8859-1, text/html + // application/octet-stream + // - // a response could have multiple content type headers... - // we'll honor the last one. But for charset, we will only - // honor the last one that comes with charset. - mContentType.Truncate(); - - // we don't care about comments (although they are invalid here) - char *p = (char *) strchr(type, '('); - if (p) - *p = 0; - - // check if the content-type has additional fields... - if ((p = (char *) strchr(type, ';')) != nsnull) { - char *p2, *p3; - // is there a charset field? - if ((p2 = PL_strcasestr(p, "charset=")) != nsnull) { - p2 += 8; - - // check end of charset parameter - if ((p3 = (char *) strchr(p2, ';')) == nsnull) - p3 = p2 + strlen(p2); - - // trim any trailing whitespace - do { - --p3; - } while ((*p3 == ' ') || (*p3 == '\t')); - *++p3 = 0; // overwrite first char after the charset field - - mContentCharset = p2; + // iterate over media-types + char *nextType; + do { + nextType = (char *) strchr(type, ','); + if (nextType) { + *nextType = '\0'; + ++nextType; } - } - else - p = type + strlen(type); + // type points at this media-type; locate first parameter if any + char *charset = ""; + char *param = (char *) strchr(type, ';'); + if (param) { + *param = '\0'; + ++param; - // trim any trailing whitespace - while (--p >= type && ((*p == ' ') || (*p == '\t'))) - ; - *++p = 0; // overwrite first char after the media type + // iterate over parameters + char *nextParam; + do { + nextParam = (char *) strchr(param, ';'); + if (nextParam) { + *nextParam = '\0'; + ++nextParam; + } + // param points at this parameter - // force the content-type to lowercase - while (--p >= type) - *p = nsCRT::ToLower(*p); + param = net_FindCharNotInSet(param, HTTP_LWS); + if (PL_strncasecmp(param, "charset=", 8) == 0) + charset = param + 8; - // If the server sent "*/*", it is meaningless, so do not store it. - if (PL_strcmp(type, "*/*")) - mContentType = type; + } while ((param = nextParam) != nsnull); + } + + // trim LWS leading and trailing whitespace from type and charset. + // charset cannot have leading whitespace. we include '(' in the + // trailing trim set to catch media-type comments, which are not + // at all standard, but may occur in rare cases. + + type = net_FindCharNotInSet(type, HTTP_LWS); + + char *typeEnd = net_FindCharInSet(type, HTTP_LWS "("); + char *charsetEnd = net_FindCharInSet(charset, HTTP_LWS "("); + + // force content-type to be lowercase + net_ToLowerCase(type, typeEnd - type); + + // if the server sent "*/*", it is meaningless, so do not store it. + // also, if type is the same as mContentType, then just update the + // charset. however, if charset is empty and mContentType hasn't + // changed, then don't wipe-out an existing mContentCharset. + + if (*type && strcmp(type, "*/*") != 0) { + PRBool eq = mContentType.Equals(Substring(type, typeEnd)); + if (!eq) + mContentType.Assign(type, typeEnd - type); + if (!eq || *charset) + mContentCharset.Assign(charset, charsetEnd - charset); + } + + } while ((type = nextType) != nsnull); } void