Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab

* net_ExtractURLScheme now uses mozilla::Tokenizer * net_FilterURIString also filters characters in the scheme now * removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
2016-02-05 14:45:08 +01:00 · 2016-02-05 14:45:08 +01:00 · 82a16a09cb
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@ -564,7 +564,7 @@ nsIOService::GetProtocolHandler(const char* scheme, nsIProtocolHandler* *result)
 NS_IMETHODIMP
 nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
 {
-    return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+    return net_ExtractURLScheme(inURI, scheme);
 }
 NS_IMETHODIMP 
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@ -2867,21 +2867,9 @@ nsStandardURL::Init(uint32_t urlType,
        mOriginCharset = charset;
    }
-    if (baseURI) {
+    if (baseURI && net_IsAbsoluteURL(spec)) {
        uint32_t start, end;
        // pull out the scheme and where it ends
        nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
        if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
            nsACString::const_iterator slash;
            spec.BeginReading(slash);
            slash.advance(end+1);
            // then check if // follows
            // if it follows, aSpec is really absolute ... 
            // ignore aBaseURI in this case
            if (*slash == '/' && *(++slash) == '/')
        baseURI = nullptr;
    }
    }
    if (!baseURI)
        return SetSpec(spec);
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@ -14,6 +14,7 @@
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
 #include "mozilla/Tokenizer.h"
 using namespace mozilla;
@ -180,11 +181,11 @@ net_ParseFileURL(const nsACString &inURL,
    const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
    const char *url = flatURL.get();
-    uint32_t schemeBeg, schemeEnd;
+    nsAutoCString scheme;
-    rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+    rv = net_ExtractURLScheme(flatURL, scheme);
    if (NS_FAILED(rv)) return rv;
-    if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+    if (!scheme.EqualsLiteral("file")) {
        NS_ERROR("must be a file:// url");
        return NS_ERROR_UNEXPECTED;
    }
@ -483,57 +484,55 @@ net_ResolveRelativePath(const nsACString &relativePath,
 // scheme fu
 //----------------------------------------------------------------------------
 #if !defined(MOZILLA_XPCOMRT_API)
 static bool isAsciiAlpha(char c) {
    return nsCRT::IsAsciiAlpha(c);
 }
 static bool
 net_IsValidSchemeChar(const char aChar)
 {
    if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
        aChar == '+' || aChar == '.' || aChar == '-') {
        return true;
    }
    return false;
 }
 #endif
 /* Extract URI-Scheme if possible */
 nsresult
 net_ExtractURLScheme(const nsACString &inURI,
-                     uint32_t *startPos, 
+                     nsACString& scheme)
                     uint32_t *endPos,
                     nsACString *scheme)
 {
-    // search for something up to a colon, and call it the scheme
+#if defined(MOZILLA_XPCOMRT_API)
-    const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
+    NS_WARNING("net_ExtractURLScheme not implemented");
-    const char* uri_start = flatURI.get();
+    return NS_ERROR_NOT_IMPLEMENTED;
-    const char* uri = uri_start;
+#else
    Tokenizer p(inURI, "\r\n\t");
-    if (!uri)
+    while (p.CheckWhite() || p.CheckChar(' ')) {
        // Skip leading whitespace
    }
    p.Record();
    if (!p.CheckChar(isAsciiAlpha)) {
        // First char must be alpha
        return NS_ERROR_MALFORMED_URI;
    // skip leading white space
    while (nsCRT::IsAsciiSpace(*uri))
        uri++;
    uint32_t start = uri - uri_start;
    if (startPos) {
        *startPos = start;
    }
-    uint32_t length = 0;
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
-    char c;
+        // Skip valid scheme characters or \r\n\t
    while ((c = *uri++) != '\0') {
        // First char must be Alpha
        if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
            length++;
        } 
        // Next chars can be alpha + digit + some special chars
        else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
                 nsCRT::IsAsciiDigit(c) || c == '+' || 
                 c == '.' || c == '-')) {
            length++;
        }
        // stop if colon reached but not as first char
        else if (c == ':' && length > 0) {
            if (endPos) {
                *endPos = start + length;
    }
-            if (scheme)
+    if (!p.CheckChar(':')) {
-                scheme->Assign(Substring(inURI, start, length));
+        return NS_ERROR_MALFORMED_URI;
    }
    p.Claim(scheme);
    scheme.StripChars("\r\n\t");
    return NS_OK;
-        }
+#endif
        else 
            break;
    }
    return NS_ERROR_MALFORMED_URI;
 }
 bool
@ -556,87 +555,73 @@ net_IsValidScheme(const char *scheme, uint32_t schemeLen)
    return true;
 }
 bool
 net_IsAbsoluteURL(const nsACString& uri)
 {
 #if !defined(MOZILLA_XPCOMRT_API)
    Tokenizer p(uri, "\r\n\t");
    while (p.CheckWhite() || p.CheckChar(' ')) {
        // Skip leading whitespace
    }
    // First char must be alpha
    if (!p.CheckChar(isAsciiAlpha)) {
        return false;
    }
    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
        // Skip valid scheme characters or \r\n\t
    }
    if (!p.CheckChar(':')) {
        return false;
    }
    p.SkipWhites();
    if (!p.CheckChar('/')) {
        return false;
    }
    p.SkipWhites();
    if (p.CheckChar('/')) {
        // aSpec is really absolute. Ignore aBaseURI in this case
        return true;
    }
 #endif
    return false;
 }
 bool
 net_FilterURIString(const char *str, nsACString& result)
 {
    NS_PRECONDITION(str, "Must have a non-null string!");
    bool writing = false;
    result.Truncate();
    const char *p = str;
-    // Remove leading spaces, tabs, CR, LF if any.
+    // Figure out if we need to filter anything.
-    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
+    bool writing = false;
        writing = true;
        str = p + 1;
        p++;
    }
    // Don't strip from the scheme, because other code assumes everything
    // up to the ':' is the scheme, and it's bad not to have it match.
    // If there's no ':', strip.
    bool found_colon = false;
    const char *first = nullptr;
    while (*p) {
-        switch (*p) {
+        if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
            case '\t': 
            case '\r': 
            case '\n':
                if (found_colon) {
            writing = true;
                    // append chars up to but not including *p
                    if (p > str)
                        result.Append(str, p - str);
                    str = p + 1;
                } else {
                    // remember where the first \t\r\n was in case we find no scheme
                    if (!first)
                        first = p;
                }
                break;
            case ':':
                found_colon = true;
                break;
            case '/':
            case '@':
                if (!found_colon) {
                    // colon also has to precede / or @ to be a scheme
                    found_colon = true; // not really, but means ok to strip
                    if (first) {
                        // go back and replace
                        p = first;
                        continue; // process *p again
                    }
                }
                break;
            default:
            break;
        }
        p++;
        // At end, if there was no scheme, and we hit a control char, fix
        // it up now.
        if (!*p && first != nullptr && !found_colon) {
            // TRICKY - to avoid duplicating code, we reset the loop back
            // to the point we found something to do
            p = first;
            // This also stops us from looping after we finish
            found_colon = true; // so we'll replace \t\r\n
        }
    }
-    // Remove trailing spaces if any
+    if (!writing) {
-    while (((p-1) >= str) && (*(p-1) == ' ')) {
+        // Nothing to strip or filter
-        writing = true;
+        return false;
        p--;
    }
-    if (writing && p > str)
+    nsAutoCString temp;
        result.Append(str, p - str);
-    return writing;
+    temp.Assign(str);
    temp.Trim("\r\n\t ");
    temp.StripChars("\r\n\t");
    result.Assign(temp);
    return true;
 }
 #if defined(XP_WIN)
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@ -79,18 +79,22 @@ nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                             const nsACString &basePath,
                                             nsACString &result);
 /**
 * Check if a URL is absolute
 *
 * @param inURL     URL spec
 * @return true if the given spec represents an absolute URL
 */
 bool net_IsAbsoluteURL(const nsACString& inURL);
 /**
 * Extract URI-Scheme if possible
 *
 * @param inURI     URI spec
 * @param startPos  start of scheme (may be null)
 * @param endPos    end of scheme; index of colon (may be null)
 * @param scheme    scheme copied to this buffer on return (may be null)
 */
 nsresult net_ExtractURLScheme(const nsACString &inURI,
-                                          uint32_t *startPos, 
+                              nsACString &scheme);
                                          uint32_t *endPos,
                                          nsACString *scheme = nullptr);
 /* check that the given scheme conforms to RFC 2396 */
 bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
@ -109,8 +113,7 @@ inline bool net_IsValidScheme(const nsAFlatCString &scheme)
 * This function strips out all whitespace at the beginning and end of the URL
 * and strips out \r, \n, \t from the middle of the URL.  This makes it safe to
 * call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded.  Note that stripping does not
+ * into whitespace that is not properly encoded.
 * occur in the scheme portion itself.
 *
 * @param str the pointer to the string to filter.  Must be non-null.
 * @param result the out param to write to if filtering happens
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@ -344,7 +344,7 @@ nsresult
 Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
 {
  nsAutoCString scheme;
-  nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+  nsresult rv = net_ExtractURLScheme(origin, scheme);
  NS_ENSURE_SUCCESS(rv, rv);
  return MakeOriginURL(scheme, origin, url);
 }
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@ -54,7 +54,7 @@ SubstitutingURL::EnsureFile()
    return rv;
  nsAutoCString scheme;
-  rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+  rv = net_ExtractURLScheme(spec, scheme);
  if (NS_FAILED(rv))
    return rv;
--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@ -760,8 +760,10 @@ nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
    // for some protocols, we expect the location to be absolute.
    // if so, and if the location indeed appears to be a valid URI, then go
    // ahead and treat it like one.
    nsAutoCString scheme;
    if (mExpectAbsLoc &&
-        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+        NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
        // escape as absolute 
        escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
    }
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@ -300,3 +300,9 @@ add_test(function test_hugeStringThrows()
  run_next_test();
 });
 add_test(function test_filterWhitespace()
 {
  var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
  do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
 });