Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab

* net_ExtractURLScheme now uses mozilla::Tokenizer * net_FilterURIString also filters characters in the scheme now * removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
2016-02-05 14:45:08 +01:00 · 2016-02-05 14:45:08 +01:00 · 82a16a09cb
--- a/netwerk/base/nsIOService.cpp
+++ b/netwerk/base/nsIOService.cpp
@ -564,7 +564,7 @@ nsIOService::GetProtocolHandler(const char* scheme, nsIProtocolHandler* *result)
 NS_IMETHODIMP
 nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
 {
-    return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
+    return net_ExtractURLScheme(inURI, scheme);
 }

 NS_IMETHODIMP 
--- a/netwerk/base/nsStandardURL.cpp
+++ b/netwerk/base/nsStandardURL.cpp
@ -2867,20 +2867,8 @@ nsStandardURL::Init(uint32_t urlType,
        mOriginCharset = charset;
    }

-    if (baseURI) {
-        uint32_t start, end;
-        // pull out the scheme and where it ends
-        nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
-        if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
-            nsACString::const_iterator slash;
-            spec.BeginReading(slash);
-            slash.advance(end+1);
-            // then check if // follows
-            // if it follows, aSpec is really absolute ... 
-            // ignore aBaseURI in this case
-            if (*slash == '/' && *(++slash) == '/')
-                baseURI = nullptr;
-        }
+    if (baseURI && net_IsAbsoluteURL(spec)) {
+        baseURI = nullptr;
    }

    if (!baseURI)
--- a/netwerk/base/nsURLHelper.cpp
+++ b/netwerk/base/nsURLHelper.cpp
@ -14,6 +14,7 @@
 #include "nsNetCID.h"
 #include "mozilla/Preferences.h"
 #include "prnetdb.h"
+#include "mozilla/Tokenizer.h"

 using namespace mozilla;

@ -179,12 +180,12 @@ net_ParseFileURL(const nsACString &inURL,

    const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
    const char *url = flatURL.get();
-    
-    uint32_t schemeBeg, schemeEnd;
-    rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
+
+    nsAutoCString scheme;
+    rv = net_ExtractURLScheme(flatURL, scheme);
    if (NS_FAILED(rv)) return rv;

-    if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
+    if (!scheme.EqualsLiteral("file")) {
        NS_ERROR("must be a file:// url");
        return NS_ERROR_UNEXPECTED;
    }
@ -483,57 +484,55 @@ net_ResolveRelativePath(const nsACString &relativePath,
 // scheme fu
 //----------------------------------------------------------------------------

+#if !defined(MOZILLA_XPCOMRT_API)
+static bool isAsciiAlpha(char c) {
+    return nsCRT::IsAsciiAlpha(c);
+}
+
+static bool
+net_IsValidSchemeChar(const char aChar)
+{
+    if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
+        aChar == '+' || aChar == '.' || aChar == '-') {
+        return true;
+    }
+    return false;
+}
+#endif
+
 /* Extract URI-Scheme if possible */
 nsresult
 net_ExtractURLScheme(const nsACString &inURI,
-                     uint32_t *startPos, 
-                     uint32_t *endPos,
-                     nsACString *scheme)
+                     nsACString& scheme)
 {
-    // search for something up to a colon, and call it the scheme
-    const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
-    const char* uri_start = flatURI.get();
-    const char* uri = uri_start;
+#if defined(MOZILLA_XPCOMRT_API)
+    NS_WARNING("net_ExtractURLScheme not implemented");
+    return NS_ERROR_NOT_IMPLEMENTED;
+#else
+    Tokenizer p(inURI, "\r\n\t");

-    if (!uri)
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
+    }
+
+    p.Record();
+    if (!p.CheckChar(isAsciiAlpha)) {
+        // First char must be alpha
        return NS_ERROR_MALFORMED_URI;
-
-    // skip leading white space
-    while (nsCRT::IsAsciiSpace(*uri))
-        uri++;
-
-    uint32_t start = uri - uri_start;
-    if (startPos) {
-        *startPos = start;
    }

-    uint32_t length = 0;
-    char c;
-    while ((c = *uri++) != '\0') {
-        // First char must be Alpha
-        if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
-            length++;
-        } 
-        // Next chars can be alpha + digit + some special chars
-        else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
-                 nsCRT::IsAsciiDigit(c) || c == '+' || 
-                 c == '.' || c == '-')) {
-            length++;
-        }
-        // stop if colon reached but not as first char
-        else if (c == ':' && length > 0) {
-            if (endPos) {
-                *endPos = start + length;
-            }
-
-            if (scheme)
-                scheme->Assign(Substring(inURI, start, length));
-            return NS_OK;
-        }
-        else 
-            break;
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
    }
-    return NS_ERROR_MALFORMED_URI;
+
+    if (!p.CheckChar(':')) {
+        return NS_ERROR_MALFORMED_URI;
+    }
+
+    p.Claim(scheme);
+    scheme.StripChars("\r\n\t");
+    return NS_OK;
+#endif
 }

 bool
@ -556,87 +555,73 @@ net_IsValidScheme(const char *scheme, uint32_t schemeLen)
    return true;
 }

+bool
+net_IsAbsoluteURL(const nsACString& uri)
+{
+#if !defined(MOZILLA_XPCOMRT_API)
+    Tokenizer p(uri, "\r\n\t");
+
+    while (p.CheckWhite() || p.CheckChar(' ')) {
+        // Skip leading whitespace
+    }
+
+    // First char must be alpha
+    if (!p.CheckChar(isAsciiAlpha)) {
+        return false;
+    }
+
+    while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
+        // Skip valid scheme characters or \r\n\t
+    }
+    if (!p.CheckChar(':')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (!p.CheckChar('/')) {
+        return false;
+    }
+    p.SkipWhites();
+
+    if (p.CheckChar('/')) {
+        // aSpec is really absolute. Ignore aBaseURI in this case
+        return true;
+    }
+#endif
+    return false;
+}
+
 bool
 net_FilterURIString(const char *str, nsACString& result)
 {
    NS_PRECONDITION(str, "Must have a non-null string!");
-    bool writing = false;
    result.Truncate();
    const char *p = str;

-    // Remove leading spaces, tabs, CR, LF if any.
-    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
-        writing = true;
-        str = p + 1;
-        p++;
-    }
-
-    // Don't strip from the scheme, because other code assumes everything
-    // up to the ':' is the scheme, and it's bad not to have it match.
-    // If there's no ':', strip.
-    bool found_colon = false;
-    const char *first = nullptr;
+    // Figure out if we need to filter anything.
+    bool writing = false;
    while (*p) {
-        switch (*p) {
-            case '\t': 
-            case '\r': 
-            case '\n':
-                if (found_colon) {
-                    writing = true;
-                    // append chars up to but not including *p
-                    if (p > str)
-                        result.Append(str, p - str);
-                    str = p + 1;
-                } else {
-                    // remember where the first \t\r\n was in case we find no scheme
-                    if (!first)
-                        first = p;
-                }
-                break;
-
-            case ':':
-                found_colon = true;
-                break;
-
-            case '/':
-            case '@':
-                if (!found_colon) {
-                    // colon also has to precede / or @ to be a scheme
-                    found_colon = true; // not really, but means ok to strip
-                    if (first) {
-                        // go back and replace
-                        p = first;
-                        continue; // process *p again
-                    }
-                }
-                break;
-
-            default:
-                break;
+        if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
+            writing = true;
+            break;
        }
        p++;
-
-        // At end, if there was no scheme, and we hit a control char, fix
-        // it up now.
-        if (!*p && first != nullptr && !found_colon) {
-            // TRICKY - to avoid duplicating code, we reset the loop back
-            // to the point we found something to do
-            p = first;
-            // This also stops us from looping after we finish
-            found_colon = true; // so we'll replace \t\r\n
-        }
    }

-    // Remove trailing spaces if any
-    while (((p-1) >= str) && (*(p-1) == ' ')) {
-        writing = true;
-        p--;
+    if (!writing) {
+        // Nothing to strip or filter
+        return false;
    }

-    if (writing && p > str)
-        result.Append(str, p - str);
+    nsAutoCString temp;

-    return writing;
+    temp.Assign(str);
+    temp.Trim("\r\n\t ");
+    temp.StripChars("\r\n\t");
+
+    result.Assign(temp);
+
+    return true;
 }

 #if defined(XP_WIN)
--- a/netwerk/base/nsURLHelper.h
+++ b/netwerk/base/nsURLHelper.h
@ -79,18 +79,22 @@ nsresult net_ResolveRelativePath(const nsACString &relativePath,
                                             const nsACString &basePath,
                                             nsACString &result);

+/**
+ * Check if a URL is absolute
+ *
+ * @param inURL     URL spec
+ * @return true if the given spec represents an absolute URL
+ */
+bool net_IsAbsoluteURL(const nsACString& inURL);
+
 /**
 * Extract URI-Scheme if possible
 *
 * @param inURI     URI spec
- * @param startPos  start of scheme (may be null)
- * @param endPos    end of scheme; index of colon (may be null)
 * @param scheme    scheme copied to this buffer on return (may be null)
 */
 nsresult net_ExtractURLScheme(const nsACString &inURI,
-                                          uint32_t *startPos, 
-                                          uint32_t *endPos,
-                                          nsACString *scheme = nullptr);
+                              nsACString &scheme);

 /* check that the given scheme conforms to RFC 2396 */
 bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
@ -109,8 +113,7 @@ inline bool net_IsValidScheme(const nsAFlatCString &scheme)
 * This function strips out all whitespace at the beginning and end of the URL
 * and strips out \r, \n, \t from the middle of the URL.  This makes it safe to
 * call on things like javascript: urls or data: urls, where we may in fact run
- * into whitespace that is not properly encoded.  Note that stripping does not
- * occur in the scheme portion itself.
+ * into whitespace that is not properly encoded.
 *
 * @param str the pointer to the string to filter.  Must be non-null.
 * @param result the out param to write to if filtering happens
--- a/netwerk/protocol/http/Http2Stream.cpp
+++ b/netwerk/protocol/http/Http2Stream.cpp
@ -344,7 +344,7 @@ nsresult
 Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
 {
  nsAutoCString scheme;
-  nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
+  nsresult rv = net_ExtractURLScheme(origin, scheme);
  NS_ENSURE_SUCCESS(rv, rv);
  return MakeOriginURL(scheme, origin, url);
 }
--- a/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
+++ b/netwerk/protocol/res/SubstitutingProtocolHandler.cpp
@ -54,7 +54,7 @@ SubstitutingURL::EnsureFile()
    return rv;

  nsAutoCString scheme;
-  rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
+  rv = net_ExtractURLScheme(spec, scheme);
  if (NS_FAILED(rv))
    return rv;

--- a/netwerk/streamconv/converters/nsIndexedToHTML.cpp
+++ b/netwerk/streamconv/converters/nsIndexedToHTML.cpp
@ -760,8 +760,10 @@ nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
    // for some protocols, we expect the location to be absolute.
    // if so, and if the location indeed appears to be a valid URI, then go
    // ahead and treat it like one.
+
+    nsAutoCString scheme;
    if (mExpectAbsLoc &&
-        NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
+        NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
        // escape as absolute 
        escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
    }
--- a/netwerk/test/unit/test_standardurl.js
+++ b/netwerk/test/unit/test_standardurl.js
@ -300,3 +300,9 @@ add_test(function test_hugeStringThrows()

  run_next_test();
 });
+
+add_test(function test_filterWhitespace()
+{
+  var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
+  do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
+});