Bug 241788 - net_FilterURIString should filter \r\n\t from the entire URL r=honzab

* net_ExtractURLScheme now uses mozilla::Tokenizer
* net_FilterURIString also filters characters in the scheme now
* removed startPos and endPos parameters for net_FilterURIString and introduced net_IsAbsoluteURL
This commit is contained in:
Valentin Gosu 2016-02-05 14:45:08 +01:00
Родитель 52d81f715f
Коммит 82a16a09cb
8 изменённых файлов: 121 добавлений и 137 удалений

Просмотреть файл

@ -564,7 +564,7 @@ nsIOService::GetProtocolHandler(const char* scheme, nsIProtocolHandler* *result)
NS_IMETHODIMP
nsIOService::ExtractScheme(const nsACString &inURI, nsACString &scheme)
{
return net_ExtractURLScheme(inURI, nullptr, nullptr, &scheme);
return net_ExtractURLScheme(inURI, scheme);
}
NS_IMETHODIMP

Просмотреть файл

@ -2867,20 +2867,8 @@ nsStandardURL::Init(uint32_t urlType,
mOriginCharset = charset;
}
if (baseURI) {
uint32_t start, end;
// pull out the scheme and where it ends
nsresult rv = net_ExtractURLScheme(spec, &start, &end, nullptr);
if (NS_SUCCEEDED(rv) && spec.Length() > end+2) {
nsACString::const_iterator slash;
spec.BeginReading(slash);
slash.advance(end+1);
// then check if // follows
// if it follows, aSpec is really absolute ...
// ignore aBaseURI in this case
if (*slash == '/' && *(++slash) == '/')
baseURI = nullptr;
}
if (baseURI && net_IsAbsoluteURL(spec)) {
baseURI = nullptr;
}
if (!baseURI)

Просмотреть файл

@ -14,6 +14,7 @@
#include "nsNetCID.h"
#include "mozilla/Preferences.h"
#include "prnetdb.h"
#include "mozilla/Tokenizer.h"
using namespace mozilla;
@ -179,12 +180,12 @@ net_ParseFileURL(const nsACString &inURL,
const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
const char *url = flatURL.get();
uint32_t schemeBeg, schemeEnd;
rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
nsAutoCString scheme;
rv = net_ExtractURLScheme(flatURL, scheme);
if (NS_FAILED(rv)) return rv;
if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
if (!scheme.EqualsLiteral("file")) {
NS_ERROR("must be a file:// url");
return NS_ERROR_UNEXPECTED;
}
@ -483,57 +484,55 @@ net_ResolveRelativePath(const nsACString &relativePath,
// scheme fu
//----------------------------------------------------------------------------
#if !defined(MOZILLA_XPCOMRT_API)
static bool isAsciiAlpha(char c) {
return nsCRT::IsAsciiAlpha(c);
}
static bool
net_IsValidSchemeChar(const char aChar)
{
if (nsCRT::IsAsciiAlpha(aChar) || nsCRT::IsAsciiDigit(aChar) ||
aChar == '+' || aChar == '.' || aChar == '-') {
return true;
}
return false;
}
#endif
/* Extract URI-Scheme if possible */
nsresult
net_ExtractURLScheme(const nsACString &inURI,
uint32_t *startPos,
uint32_t *endPos,
nsACString *scheme)
nsACString& scheme)
{
// search for something up to a colon, and call it the scheme
const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
const char* uri_start = flatURI.get();
const char* uri = uri_start;
#if defined(MOZILLA_XPCOMRT_API)
NS_WARNING("net_ExtractURLScheme not implemented");
return NS_ERROR_NOT_IMPLEMENTED;
#else
Tokenizer p(inURI, "\r\n\t");
if (!uri)
while (p.CheckWhite() || p.CheckChar(' ')) {
// Skip leading whitespace
}
p.Record();
if (!p.CheckChar(isAsciiAlpha)) {
// First char must be alpha
return NS_ERROR_MALFORMED_URI;
// skip leading white space
while (nsCRT::IsAsciiSpace(*uri))
uri++;
uint32_t start = uri - uri_start;
if (startPos) {
*startPos = start;
}
uint32_t length = 0;
char c;
while ((c = *uri++) != '\0') {
// First char must be Alpha
if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
length++;
}
// Next chars can be alpha + digit + some special chars
else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||
nsCRT::IsAsciiDigit(c) || c == '+' ||
c == '.' || c == '-')) {
length++;
}
// stop if colon reached but not as first char
else if (c == ':' && length > 0) {
if (endPos) {
*endPos = start + length;
}
if (scheme)
scheme->Assign(Substring(inURI, start, length));
return NS_OK;
}
else
break;
while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
// Skip valid scheme characters or \r\n\t
}
return NS_ERROR_MALFORMED_URI;
if (!p.CheckChar(':')) {
return NS_ERROR_MALFORMED_URI;
}
p.Claim(scheme);
scheme.StripChars("\r\n\t");
return NS_OK;
#endif
}
bool
@ -556,87 +555,73 @@ net_IsValidScheme(const char *scheme, uint32_t schemeLen)
return true;
}
bool
net_IsAbsoluteURL(const nsACString& uri)
{
#if !defined(MOZILLA_XPCOMRT_API)
Tokenizer p(uri, "\r\n\t");
while (p.CheckWhite() || p.CheckChar(' ')) {
// Skip leading whitespace
}
// First char must be alpha
if (!p.CheckChar(isAsciiAlpha)) {
return false;
}
while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
// Skip valid scheme characters or \r\n\t
}
if (!p.CheckChar(':')) {
return false;
}
p.SkipWhites();
if (!p.CheckChar('/')) {
return false;
}
p.SkipWhites();
if (p.CheckChar('/')) {
// aSpec is really absolute. Ignore aBaseURI in this case
return true;
}
#endif
return false;
}
bool
net_FilterURIString(const char *str, nsACString& result)
{
NS_PRECONDITION(str, "Must have a non-null string!");
bool writing = false;
result.Truncate();
const char *p = str;
// Remove leading spaces, tabs, CR, LF if any.
while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
writing = true;
str = p + 1;
p++;
}
// Don't strip from the scheme, because other code assumes everything
// up to the ':' is the scheme, and it's bad not to have it match.
// If there's no ':', strip.
bool found_colon = false;
const char *first = nullptr;
// Figure out if we need to filter anything.
bool writing = false;
while (*p) {
switch (*p) {
case '\t':
case '\r':
case '\n':
if (found_colon) {
writing = true;
// append chars up to but not including *p
if (p > str)
result.Append(str, p - str);
str = p + 1;
} else {
// remember where the first \t\r\n was in case we find no scheme
if (!first)
first = p;
}
break;
case ':':
found_colon = true;
break;
case '/':
case '@':
if (!found_colon) {
// colon also has to precede / or @ to be a scheme
found_colon = true; // not really, but means ok to strip
if (first) {
// go back and replace
p = first;
continue; // process *p again
}
}
break;
default:
break;
if (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
writing = true;
break;
}
p++;
// At end, if there was no scheme, and we hit a control char, fix
// it up now.
if (!*p && first != nullptr && !found_colon) {
// TRICKY - to avoid duplicating code, we reset the loop back
// to the point we found something to do
p = first;
// This also stops us from looping after we finish
found_colon = true; // so we'll replace \t\r\n
}
}
// Remove trailing spaces if any
while (((p-1) >= str) && (*(p-1) == ' ')) {
writing = true;
p--;
if (!writing) {
// Nothing to strip or filter
return false;
}
if (writing && p > str)
result.Append(str, p - str);
nsAutoCString temp;
return writing;
temp.Assign(str);
temp.Trim("\r\n\t ");
temp.StripChars("\r\n\t");
result.Assign(temp);
return true;
}
#if defined(XP_WIN)

Просмотреть файл

@ -79,18 +79,22 @@ nsresult net_ResolveRelativePath(const nsACString &relativePath,
const nsACString &basePath,
nsACString &result);
/**
* Check if a URL is absolute
*
* @param inURL URL spec
* @return true if the given spec represents an absolute URL
*/
bool net_IsAbsoluteURL(const nsACString& inURL);
/**
* Extract URI-Scheme if possible
*
* @param inURI URI spec
* @param startPos start of scheme (may be null)
* @param endPos end of scheme; index of colon (may be null)
* @param scheme scheme copied to this buffer on return (may be null)
*/
nsresult net_ExtractURLScheme(const nsACString &inURI,
uint32_t *startPos,
uint32_t *endPos,
nsACString *scheme = nullptr);
nsACString &scheme);
/* check that the given scheme conforms to RFC 2396 */
bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
@ -109,8 +113,7 @@ inline bool net_IsValidScheme(const nsAFlatCString &scheme)
* This function strips out all whitespace at the beginning and end of the URL
* and strips out \r, \n, \t from the middle of the URL. This makes it safe to
* call on things like javascript: urls or data: urls, where we may in fact run
* into whitespace that is not properly encoded. Note that stripping does not
* occur in the scheme portion itself.
* into whitespace that is not properly encoded.
*
* @param str the pointer to the string to filter. Must be non-null.
* @param result the out param to write to if filtering happens

Просмотреть файл

@ -344,7 +344,7 @@ nsresult
Http2Stream::MakeOriginURL(const nsACString &origin, RefPtr<nsStandardURL> &url)
{
nsAutoCString scheme;
nsresult rv = net_ExtractURLScheme(origin, nullptr, nullptr, &scheme);
nsresult rv = net_ExtractURLScheme(origin, scheme);
NS_ENSURE_SUCCESS(rv, rv);
return MakeOriginURL(scheme, origin, url);
}

Просмотреть файл

@ -54,7 +54,7 @@ SubstitutingURL::EnsureFile()
return rv;
nsAutoCString scheme;
rv = net_ExtractURLScheme(spec, nullptr, nullptr, &scheme);
rv = net_ExtractURLScheme(spec, scheme);
if (NS_FAILED(rv))
return rv;

Просмотреть файл

@ -760,8 +760,10 @@ nsIndexedToHTML::OnIndexAvailable(nsIRequest *aRequest,
// for some protocols, we expect the location to be absolute.
// if so, and if the location indeed appears to be a valid URI, then go
// ahead and treat it like one.
nsAutoCString scheme;
if (mExpectAbsLoc &&
NS_SUCCEEDED(net_ExtractURLScheme(loc, nullptr, nullptr, nullptr))) {
NS_SUCCEEDED(net_ExtractURLScheme(loc, scheme))) {
// escape as absolute
escFlags = esc_Forced | esc_AlwaysCopy | esc_Minimal;
}

Просмотреть файл

@ -300,3 +300,9 @@ add_test(function test_hugeStringThrows()
run_next_test();
});
add_test(function test_filterWhitespace()
{
var url = stringToURL(" \r\n\th\nt\rt\tp://ex\r\n\tample.com/path\r\n\t/\r\n\tto the/fil\r\n\te.e\r\n\txt?que\r\n\try#ha\r\n\tsh \r\n\t ");
do_check_eq(url.spec, "http://example.com/path/to%20the/file.ext?query#hash");
});