2001-09-26 04:40:45 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 15:12:37 +04:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
1999-09-21 04:30:12 +04:00
|
|
|
#include "nsString.h"
|
|
|
|
#include "nsIUnicodeEncoder.h"
|
|
|
|
#include "nsICharsetConverterManager.h"
|
|
|
|
#include "nsITextToSubURI.h"
|
|
|
|
#include "nsEscape.h"
|
2001-12-08 03:25:28 +03:00
|
|
|
#include "nsTextToSubURI.h"
|
2002-05-15 22:55:21 +04:00
|
|
|
#include "nsCRT.h"
|
2013-09-27 20:45:04 +04:00
|
|
|
#include "nsServiceManagerUtils.h"
|
1999-09-21 04:30:12 +04:00
|
|
|
|
|
|
|
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
|
|
|
|
|
|
|
nsTextToSubURI::nsTextToSubURI()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
nsTextToSubURI::~nsTextToSubURI()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2000-11-17 11:06:12 +03:00
|
|
|
NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)
|
1999-09-21 04:30:12 +04:00
|
|
|
|
|
|
|
NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
|
|
|
|
const char *charset, const PRUnichar *text, char **_retval)
|
|
|
|
{
|
2012-07-30 18:20:58 +04:00
|
|
|
if(nullptr == _retval)
|
2000-02-04 02:18:07 +03:00
|
|
|
return NS_ERROR_NULL_POINTER;
|
2012-07-30 18:20:58 +04:00
|
|
|
*_retval = nullptr;
|
1999-09-21 04:30:12 +04:00
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
// Get Charset, get the encoder.
|
2004-11-08 02:59:35 +03:00
|
|
|
nsICharsetConverterManager *ccm;
|
|
|
|
rv = CallGetService(kCharsetConverterManagerCID, &ccm);
|
|
|
|
if(NS_SUCCEEDED(rv)) {
|
|
|
|
nsIUnicodeEncoder *encoder;
|
2003-06-11 22:16:03 +04:00
|
|
|
rv = ccm->GetUnicodeEncoder(charset, &encoder);
|
2004-11-08 02:59:35 +03:00
|
|
|
NS_RELEASE(ccm);
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv)) {
|
2012-07-30 18:20:58 +04:00
|
|
|
rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (PRUnichar)'?');
|
2000-02-04 02:18:07 +03:00
|
|
|
if(NS_SUCCEEDED(rv))
|
|
|
|
{
|
|
|
|
char buf[256];
|
|
|
|
char *pBuf = buf;
|
2012-09-04 19:12:33 +04:00
|
|
|
int32_t ulen = text ? NS_strlen(text) : 0;
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t outlen = 0;
|
2000-02-04 02:18:07 +03:00
|
|
|
if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))
|
|
|
|
{
|
|
|
|
if(outlen >= 256) {
|
2009-05-15 14:37:59 +04:00
|
|
|
pBuf = (char*)NS_Alloc(outlen+1);
|
2000-02-04 02:18:07 +03:00
|
|
|
}
|
2012-07-30 18:20:58 +04:00
|
|
|
if(nullptr == pBuf) {
|
2000-02-04 02:18:07 +03:00
|
|
|
outlen = 255;
|
|
|
|
pBuf = buf;
|
|
|
|
}
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t bufLen = outlen;
|
2000-02-04 02:18:07 +03:00
|
|
|
if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
|
2002-11-14 00:56:01 +03:00
|
|
|
// put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t finLen = bufLen - outlen;
|
2002-11-14 00:56:01 +03:00
|
|
|
if (finLen > 0) {
|
|
|
|
if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
|
|
|
|
outlen += finLen;
|
|
|
|
}
|
2000-02-04 02:18:07 +03:00
|
|
|
pBuf[outlen] = '\0';
|
|
|
|
*_retval = nsEscape(pBuf, url_XPAlphas);
|
2012-07-30 18:20:58 +04:00
|
|
|
if(nullptr == *_retval)
|
2000-02-04 02:18:07 +03:00
|
|
|
rv = NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(pBuf != buf)
|
2009-05-15 14:37:59 +04:00
|
|
|
NS_Free(pBuf);
|
2000-02-04 02:18:07 +03:00
|
|
|
}
|
2004-11-08 02:59:35 +03:00
|
|
|
NS_RELEASE(encoder);
|
1999-09-21 04:30:12 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
1999-09-21 09:13:53 +04:00
|
|
|
|
2000-02-04 02:18:07 +03:00
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
|
|
|
|
const char *charset, const char *text, PRUnichar **_retval)
|
|
|
|
{
|
2012-07-30 18:20:58 +04:00
|
|
|
if(nullptr == _retval)
|
2000-02-04 02:18:07 +03:00
|
|
|
return NS_ERROR_NULL_POINTER;
|
2012-07-30 18:20:58 +04:00
|
|
|
if(nullptr == text) {
|
2009-10-30 12:17:19 +03:00
|
|
|
// set empty string instead of returning error
|
|
|
|
// due to compatibility for old version
|
|
|
|
text = "";
|
|
|
|
}
|
2012-07-30 18:20:58 +04:00
|
|
|
*_retval = nullptr;
|
2000-02-04 02:18:07 +03:00
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
// unescape the string, unescape changes the input
|
2009-05-15 14:37:59 +04:00
|
|
|
char *unescaped = NS_strdup(text);
|
2012-07-30 18:20:58 +04:00
|
|
|
if (nullptr == unescaped)
|
2000-02-04 02:18:07 +03:00
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
unescaped = nsUnescape(unescaped);
|
|
|
|
NS_ASSERTION(unescaped, "nsUnescape returned null");
|
|
|
|
|
|
|
|
// Convert from the charset to unicode
|
2001-07-25 11:54:28 +04:00
|
|
|
nsCOMPtr<nsICharsetConverterManager> ccm =
|
|
|
|
do_GetService(kCharsetConverterManagerCID, &rv);
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv)) {
|
|
|
|
nsIUnicodeDecoder *decoder;
|
2003-06-11 22:16:03 +04:00
|
|
|
rv = ccm->GetUnicodeDecoder(charset, &decoder);
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv)) {
|
2012-07-30 18:20:58 +04:00
|
|
|
PRUnichar *pBuf = nullptr;
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t len = strlen(unescaped);
|
|
|
|
int32_t outlen = 0;
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
|
2010-04-19 10:33:00 +04:00
|
|
|
pBuf = (PRUnichar *) NS_Alloc((outlen+1)*sizeof(PRUnichar));
|
2012-07-30 18:20:58 +04:00
|
|
|
if (nullptr == pBuf)
|
2000-02-04 02:18:07 +03:00
|
|
|
rv = NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
else {
|
|
|
|
if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
|
|
|
|
pBuf[outlen] = 0;
|
|
|
|
*_retval = pBuf;
|
|
|
|
}
|
2006-04-14 03:38:55 +04:00
|
|
|
else
|
2009-05-15 14:37:59 +04:00
|
|
|
NS_Free(pBuf);
|
2000-02-04 02:18:07 +03:00
|
|
|
}
|
|
|
|
}
|
2004-11-08 02:59:35 +03:00
|
|
|
NS_RELEASE(decoder);
|
2000-02-04 02:18:07 +03:00
|
|
|
}
|
|
|
|
}
|
2009-05-15 14:37:59 +04:00
|
|
|
NS_Free(unescaped);
|
2000-02-04 02:18:07 +03:00
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2011-09-29 10:19:26 +04:00
|
|
|
static bool statefulCharset(const char *charset)
|
2002-09-08 19:08:21 +04:00
|
|
|
{
|
|
|
|
if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
|
|
|
|
!nsCRT::strcasecmp(charset, "UTF-7") ||
|
|
|
|
!nsCRT::strcasecmp(charset, "HZ-GB-2312"))
|
2011-10-17 18:59:28 +04:00
|
|
|
return true;
|
2002-09-08 19:08:21 +04:00
|
|
|
|
2011-10-17 18:59:28 +04:00
|
|
|
return false;
|
2002-09-08 19:08:21 +04:00
|
|
|
}
|
|
|
|
|
2002-08-12 23:23:22 +04:00
|
|
|
nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
|
|
|
|
const nsAFlatCString &aURI,
|
2011-09-29 10:19:26 +04:00
|
|
|
bool aIRI,
|
2002-08-12 23:23:22 +04:00
|
|
|
nsAString &_retval)
|
|
|
|
{
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
// check for 7bit encoding the data may not be ASCII after we decode
|
2011-09-29 10:19:26 +04:00
|
|
|
bool isStatefulCharset = statefulCharset(aCharset.get());
|
2002-09-08 19:08:21 +04:00
|
|
|
|
|
|
|
if (!isStatefulCharset && IsASCII(aURI)) {
|
2003-12-23 19:48:40 +03:00
|
|
|
CopyASCIItoUTF16(aURI, _retval);
|
2002-08-12 23:23:22 +04:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
if (!isStatefulCharset && aIRI) {
|
2003-03-25 11:11:13 +03:00
|
|
|
if (IsUTF8(aURI)) {
|
2003-12-23 19:48:40 +03:00
|
|
|
CopyUTF8toUTF16(aURI, _retval);
|
2002-08-12 23:23:22 +04:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-02-19 14:14:35 +03:00
|
|
|
// empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
|
|
|
|
NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
|
|
|
|
|
2003-06-11 22:16:03 +04:00
|
|
|
nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
|
2002-08-12 23:23:22 +04:00
|
|
|
|
|
|
|
charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
|
2003-06-11 22:16:03 +04:00
|
|
|
rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(),
|
2002-08-12 23:23:22 +04:00
|
|
|
getter_AddRefs(unicodeDecoder));
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
2013-04-19 17:15:41 +04:00
|
|
|
unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
|
2002-08-12 23:23:22 +04:00
|
|
|
|
2012-08-22 19:56:38 +04:00
|
|
|
int32_t srcLen = aURI.Length();
|
|
|
|
int32_t dstLen;
|
2002-08-12 23:23:22 +04:00
|
|
|
rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
2009-05-15 14:37:59 +04:00
|
|
|
PRUnichar *ustr = (PRUnichar *) NS_Alloc(dstLen * sizeof(PRUnichar));
|
2002-08-12 23:23:22 +04:00
|
|
|
NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
|
|
|
|
|
|
|
|
rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
|
|
|
|
|
|
|
|
if (NS_SUCCEEDED(rv))
|
|
|
|
_retval.Assign(ustr, dstLen);
|
|
|
|
|
2009-05-15 14:37:59 +04:00
|
|
|
NS_Free(ustr);
|
2002-08-12 23:23:22 +04:00
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
|
|
|
|
const nsACString &aURIFragment,
|
|
|
|
nsAString &_retval)
|
|
|
|
{
|
2012-09-02 06:35:17 +04:00
|
|
|
nsAutoCString unescapedSpec;
|
2004-01-07 07:17:40 +03:00
|
|
|
// skip control octets (0x00 - 0x1f and 0x7f) when unescaping
|
2004-01-07 06:57:20 +03:00
|
|
|
NS_UnescapeURL(PromiseFlatCString(aURIFragment),
|
2004-01-07 07:17:40 +03:00
|
|
|
esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
|
2002-08-12 23:23:22 +04:00
|
|
|
|
2005-02-22 21:25:12 +03:00
|
|
|
// in case of failure, return escaped URI
|
2006-05-28 09:39:25 +04:00
|
|
|
// Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
|
|
|
|
// sequences are also considered failure in this context
|
|
|
|
if (convertURItoUnicode(
|
2011-10-17 18:59:28 +04:00
|
|
|
PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
|
2006-05-28 09:39:25 +04:00
|
|
|
!= NS_OK)
|
2005-02-22 21:25:12 +03:00
|
|
|
// assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
|
|
|
|
CopyUTF8toUTF16(aURIFragment, _retval);
|
|
|
|
return NS_OK;
|
2002-08-12 23:23:22 +04:00
|
|
|
}
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
|
2011-11-06 19:14:27 +04:00
|
|
|
const nsACString & aURIFragment,
|
2002-09-08 19:08:21 +04:00
|
|
|
nsAString &_retval)
|
|
|
|
{
|
2012-09-02 06:35:17 +04:00
|
|
|
nsAutoCString unescapedSpec;
|
2002-09-08 19:08:21 +04:00
|
|
|
NS_UnescapeURL(PromiseFlatCString(aURIFragment),
|
|
|
|
esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
|
2011-11-06 19:14:27 +04:00
|
|
|
// leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
|
|
|
|
// superset since converting "http:" with such an encoding is always a bad
|
|
|
|
// idea.
|
|
|
|
if (!IsUTF8(unescapedSpec) &&
|
|
|
|
(aCharset.LowerCaseEqualsLiteral("utf-16") ||
|
|
|
|
aCharset.LowerCaseEqualsLiteral("utf-16be") ||
|
|
|
|
aCharset.LowerCaseEqualsLiteral("utf-16le") ||
|
|
|
|
aCharset.LowerCaseEqualsLiteral("utf-7") ||
|
|
|
|
aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
|
|
|
|
CopyASCIItoUTF16(aURIFragment, _retval);
|
|
|
|
return NS_OK;
|
|
|
|
}
|
2002-09-08 19:08:21 +04:00
|
|
|
|
2011-10-17 18:59:28 +04:00
|
|
|
return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
|
2002-09-08 19:08:21 +04:00
|
|
|
}
|
|
|
|
|
1999-10-01 01:11:05 +04:00
|
|
|
//----------------------------------------------------------------------
|