2001-12-22 12:15:51 +03:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2000-01-17 09:05:43 +03:00
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
#ifndef nsURLHelper_h__
|
|
|
|
#define nsURLHelper_h__
|
2000-01-17 09:05:43 +03:00
|
|
|
|
2000-02-25 09:17:57 +03:00
|
|
|
#include "nsString.h"
|
2000-01-17 09:05:43 +03:00
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
class nsIFile;
|
|
|
|
class nsIURLParser;
|
2002-01-26 01:19:29 +03:00
|
|
|
|
2003-07-16 23:09:00 +04:00
|
|
|
enum netCoalesceFlags
|
|
|
|
{
|
|
|
|
NET_COALESCE_NORMAL = 0,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* retains /../ that reach above dir root (useful for FTP
|
|
|
|
* servers in which the root of the FTP URL is not necessarily
|
|
|
|
* the root of the FTP filesystem).
|
|
|
|
*/
|
|
|
|
NET_COALESCE_ALLOW_RELATIVE_ROOT = 1<<0,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* recognizes /%2F and // as markers for the root directory
|
|
|
|
* and handles them properly.
|
|
|
|
*/
|
|
|
|
NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1<<1
|
|
|
|
};
|
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
// This module contains some private helper functions related to URL parsing.
|
|
|
|
//----------------------------------------------------------------------------
|
2000-01-17 09:05:43 +03:00
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
/* shutdown frees URL parser */
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_ShutdownURLHelper();
|
2007-10-03 02:26:53 +04:00
|
|
|
#ifdef XP_MACOSX
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_ShutdownURLHelperOSX();
|
2007-10-03 02:26:53 +04:00
|
|
|
#endif
|
2000-01-17 09:05:43 +03:00
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
/* access URL parsers */
|
2014-06-02 16:08:21 +04:00
|
|
|
nsIURLParser * net_GetAuthURLParser();
|
|
|
|
nsIURLParser * net_GetNoAuthURLParser();
|
|
|
|
nsIURLParser * net_GetStdURLParser();
|
2000-05-07 14:43:36 +04:00
|
|
|
|
2009-10-17 03:59:02 +04:00
|
|
|
/* convert between nsIFile and file:// URL spec
|
|
|
|
* net_GetURLSpecFromFile does an extra stat, so callers should
|
|
|
|
* avoid it if possible in favor of net_GetURLSpecFromActualFile
|
|
|
|
* and net_GetURLSpecFromDir */
|
2014-06-02 16:08:21 +04:00
|
|
|
nsresult net_GetURLSpecFromFile(nsIFile *, nsACString &);
|
|
|
|
nsresult net_GetURLSpecFromDir(nsIFile *, nsACString &);
|
|
|
|
nsresult net_GetURLSpecFromActualFile(nsIFile *, nsACString &);
|
|
|
|
nsresult net_GetFileFromURLSpec(const nsACString &, nsIFile **);
|
2001-11-27 02:28:44 +03:00
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
/* extract file path components from file:// URL */
|
2014-06-02 16:08:21 +04:00
|
|
|
nsresult net_ParseFileURL(const nsACString &inURL,
|
2004-06-16 18:53:31 +04:00
|
|
|
nsACString &outDirectory,
|
|
|
|
nsACString &outFileBaseName,
|
|
|
|
nsACString &outFileExtension);
|
2002-09-13 23:32:45 +04:00
|
|
|
|
2004-11-06 04:55:49 +03:00
|
|
|
/* handle .. in dirs while resolving URLs (path is UTF-8) */
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_CoalesceDirs(netCoalesceFlags flags, char* path);
|
2002-03-06 10:48:55 +03:00
|
|
|
|
2002-08-15 22:38:46 +04:00
|
|
|
/**
|
|
|
|
* Resolves a relative path string containing "." and ".."
|
|
|
|
* with respect to a base path (assumed to already be resolved).
|
|
|
|
* For example, resolving "../../foo/./bar/../baz.html" w.r.t.
|
|
|
|
* "/a/b/c/d/e/" yields "/a/b/c/foo/baz.html". Attempting to
|
|
|
|
* ascend above the base results in the NS_ERROR_MALFORMED_URI
|
|
|
|
* exception. If basePath is null, it treats it as "/".
|
|
|
|
*
|
|
|
|
* @param relativePath a relative URI
|
|
|
|
* @param basePath a base URI
|
|
|
|
*
|
|
|
|
* @return a new string, representing canonical uri
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
nsresult net_ResolveRelativePath(const nsACString &relativePath,
|
2004-06-16 18:53:31 +04:00
|
|
|
const nsACString &basePath,
|
|
|
|
nsACString &result);
|
2002-09-13 23:32:45 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Extract URI-Scheme if possible
|
|
|
|
*
|
|
|
|
* @param inURI URI spec
|
|
|
|
* @param startPos start of scheme (may be null)
|
|
|
|
* @param endPos end of scheme; index of colon (may be null)
|
|
|
|
* @param scheme scheme copied to this buffer on return (may be null)
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
nsresult net_ExtractURLScheme(const nsACString &inURI,
|
2012-08-22 19:56:38 +04:00
|
|
|
uint32_t *startPos,
|
|
|
|
uint32_t *endPos,
|
2012-07-30 18:20:58 +04:00
|
|
|
nsACString *scheme = nullptr);
|
2002-09-13 23:32:45 +04:00
|
|
|
|
|
|
|
/* check that the given scheme conforms to RFC 2396 */
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_IsValidScheme(const char *scheme, uint32_t schemeLen);
|
2002-09-13 23:32:45 +04:00
|
|
|
|
2011-09-29 10:19:26 +04:00
|
|
|
inline bool net_IsValidScheme(const nsAFlatCString &scheme)
|
2002-09-13 23:32:45 +04:00
|
|
|
{
|
|
|
|
return net_IsValidScheme(scheme.get(), scheme.Length());
|
|
|
|
}
|
|
|
|
|
2003-07-09 00:09:17 +04:00
|
|
|
/**
|
|
|
|
* Filter out whitespace from a URI string. The input is the |str|
|
|
|
|
* pointer. |result| is written to if and only if there is whitespace that has
|
|
|
|
* to be filtered out. The return value is true if and only if |result| is
|
|
|
|
* written to.
|
|
|
|
*
|
|
|
|
* This function strips out all whitespace at the beginning and end of the URL
|
|
|
|
* and strips out \r, \n, \t from the middle of the URL. This makes it safe to
|
|
|
|
* call on things like javascript: urls or data: urls, where we may in fact run
|
2011-07-05 17:09:09 +04:00
|
|
|
* into whitespace that is not properly encoded. Note that stripping does not
|
|
|
|
* occur in the scheme portion itself.
|
2003-07-09 00:09:17 +04:00
|
|
|
*
|
|
|
|
* @param str the pointer to the string to filter. Must be non-null.
|
|
|
|
* @param result the out param to write to if filtering happens
|
|
|
|
* @return whether result was written to
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_FilterURIString(const char *str, nsACString& result);
|
2003-07-09 00:09:17 +04:00
|
|
|
|
2014-02-11 02:57:01 +04:00
|
|
|
#if defined(XP_WIN)
|
2004-08-04 21:08:27 +04:00
|
|
|
/**
|
|
|
|
* On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
|
|
|
|
* forward-slash. This function maps any back-slashes to forward-slashes.
|
|
|
|
*
|
|
|
|
* @param aURL
|
|
|
|
* The URL string to normalize (UTF-8 encoded). This can be a
|
|
|
|
* relative URL segment.
|
|
|
|
* @param aResultBuf
|
|
|
|
* The resulting string is appended to this string. If the input URL
|
|
|
|
* is already normalized, then aResultBuf is unchanged.
|
|
|
|
*
|
|
|
|
* @returns false if aURL is already normalized. Otherwise, returns true.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_NormalizeFileURL(const nsACString &aURL,
|
2004-08-04 21:08:27 +04:00
|
|
|
nsCString &aResultBuf);
|
|
|
|
#endif
|
|
|
|
|
2003-06-19 03:16:17 +04:00
|
|
|
/*****************************************************************************
|
|
|
|
* generic string routines follow (XXX move to someplace more generic).
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* convert to lower case */
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_ToLowerCase(char* str, uint32_t length);
|
|
|
|
void net_ToLowerCase(char* str);
|
2002-08-15 22:38:46 +04:00
|
|
|
|
2003-06-19 03:16:17 +04:00
|
|
|
/**
|
|
|
|
* returns pointer to first character of |str| in the given set. if not found,
|
|
|
|
* then |end| is returned. stops prematurely if a null byte is encountered,
|
|
|
|
* and returns the address of the null byte.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
char * net_FindCharInSet(const char *str, const char *end, const char *set);
|
2003-06-19 03:16:17 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* returns pointer to first character of |str| NOT in the given set. if all
|
|
|
|
* characters are in the given set, then |end| is returned. if '\0' is not
|
|
|
|
* included in |set|, then stops prematurely if a null byte is encountered,
|
|
|
|
* and returns the address of the null byte.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
char * net_FindCharNotInSet(const char *str, const char *end, const char *set);
|
2003-06-19 03:16:17 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* returns pointer to last character of |str| NOT in the given set. if all
|
|
|
|
* characters are in the given set, then |str - 1| is returned.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
char * net_RFindCharNotInSet(const char *str, const char *end, const char *set);
|
2003-06-19 03:16:17 +04:00
|
|
|
|
2015-10-01 21:36:19 +03:00
|
|
|
/**
|
|
|
|
* Parses a content-type header and returns the content type and
|
|
|
|
* charset (if any). aCharset is not modified if no charset is
|
|
|
|
* specified in anywhere in aHeaderStr. In that case (no charset
|
|
|
|
* specified), aHadCharset is set to false. Otherwise, it's set to
|
|
|
|
* true. Note that aContentCharset can be empty even if aHadCharset
|
|
|
|
* is true.
|
|
|
|
*
|
|
|
|
* This parsing is suitable for HTTP request. Use net_ParseContentType
|
|
|
|
* for parsing this header in HTTP responses.
|
|
|
|
*/
|
|
|
|
void net_ParseRequestContentType(const nsACString &aHeaderStr,
|
|
|
|
nsACString &aContentType,
|
|
|
|
nsACString &aContentCharset,
|
|
|
|
bool* aHadCharset);
|
|
|
|
|
2005-07-26 00:27:04 +04:00
|
|
|
/**
|
|
|
|
* Parses a content-type header and returns the content type and
|
|
|
|
* charset (if any). aCharset is not modified if no charset is
|
|
|
|
* specified in anywhere in aHeaderStr. In that case (no charset
|
|
|
|
* specified), aHadCharset is set to false. Otherwise, it's set to
|
|
|
|
* true. Note that aContentCharset can be empty even if aHadCharset
|
|
|
|
* is true.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_ParseContentType(const nsACString &aHeaderStr,
|
2015-10-01 21:36:19 +03:00
|
|
|
nsACString &aContentType,
|
|
|
|
nsACString &aContentCharset,
|
|
|
|
bool* aHadCharset);
|
2007-12-04 00:34:44 +03:00
|
|
|
/**
|
|
|
|
* As above, but also returns the start and end indexes for the charset
|
|
|
|
* parameter in aHeaderStr. These are indices for the entire parameter, NOT
|
|
|
|
* just the value. If there is "effectively" no charset parameter (e.g. if an
|
|
|
|
* earlier type with one is overridden by a later type without one),
|
|
|
|
* *aHadCharset will be true but *aCharsetStart will be set to -1. Note that
|
|
|
|
* it's possible to have aContentCharset empty and *aHadCharset true when
|
|
|
|
* *aCharsetStart is nonnegative; this corresponds to charset="".
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
void net_ParseContentType(const nsACString &aHeaderStr,
|
2015-10-01 21:36:19 +03:00
|
|
|
nsACString &aContentType,
|
|
|
|
nsACString &aContentCharset,
|
|
|
|
bool *aHadCharset,
|
|
|
|
int32_t *aCharsetStart,
|
|
|
|
int32_t *aCharsetEnd);
|
2005-07-26 00:27:04 +04:00
|
|
|
|
2003-06-19 03:16:17 +04:00
|
|
|
/* inline versions */
|
|
|
|
|
|
|
|
/* remember the 64-bit platforms ;-) */
|
|
|
|
#define NET_MAX_ADDRESS (((char*)0)-1)
|
|
|
|
|
|
|
|
inline char *net_FindCharInSet(const char *str, const char *set)
|
|
|
|
{
|
|
|
|
return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
|
|
|
|
}
|
|
|
|
inline char *net_FindCharNotInSet(const char *str, const char *set)
|
|
|
|
{
|
|
|
|
return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
|
|
|
|
}
|
|
|
|
inline char *net_RFindCharNotInSet(const char *str, const char *set)
|
|
|
|
{
|
|
|
|
return net_RFindCharNotInSet(str, str + strlen(str), set);
|
|
|
|
}
|
|
|
|
|
2005-09-23 01:38:12 +04:00
|
|
|
/**
|
|
|
|
* This function returns true if the given hostname does not include any
|
|
|
|
* restricted characters. Otherwise, false is returned.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_IsValidHostName(const nsCSubstring &host);
|
2005-09-23 01:38:12 +04:00
|
|
|
|
2011-06-23 19:04:23 +04:00
|
|
|
/**
|
|
|
|
* Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_IsValidIPv4Addr(const char *addr, int32_t addrLen);
|
2011-06-23 19:04:23 +04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
|
|
|
|
*/
|
2014-06-02 16:08:21 +04:00
|
|
|
bool net_IsValidIPv6Addr(const char *addr, int32_t addrLen);
|
2011-06-23 19:04:23 +04:00
|
|
|
|
2015-03-11 07:08:27 +03:00
|
|
|
|
|
|
|
/**
|
2015-04-23 00:10:43 +03:00
|
|
|
* Returns the max length of a URL. The default is 1048576 (1 MB).
|
2015-03-11 07:08:27 +03:00
|
|
|
* Can be changed by pref "network.standard-url.max-length"
|
|
|
|
*/
|
|
|
|
int32_t net_GetURLMaxLength();
|
|
|
|
|
2002-09-13 23:32:45 +04:00
|
|
|
#endif // !nsURLHelper_h__
|