2001-09-26 04:40:45 +04:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
1999-09-21 04:30:12 +04:00
|
|
|
*
|
2001-09-26 04:40:45 +04:00
|
|
|
* The contents of this file are subject to the Netscape Public License
|
|
|
|
* Version 1.1 (the "License"); you may not use this file except in
|
|
|
|
* compliance with the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/NPL/
|
1999-09-21 04:30:12 +04:00
|
|
|
*
|
2001-09-26 04:40:45 +04:00
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
1999-09-21 04:30:12 +04:00
|
|
|
*
|
1999-11-06 06:43:54 +03:00
|
|
|
* The Original Code is mozilla.org code.
|
|
|
|
*
|
2001-09-26 04:40:45 +04:00
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Netscape Communications Corporation.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
1999-11-06 06:43:54 +03:00
|
|
|
*
|
2001-09-26 04:40:45 +04:00
|
|
|
* Contributor(s):
|
2000-02-03 01:24:56 +03:00
|
|
|
* Pierre Phaneuf <pp@ludusdesign.com>
|
2001-09-26 04:40:45 +04:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the NPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the NPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
1999-09-21 04:30:12 +04:00
|
|
|
#include "nsString.h"
|
|
|
|
#include "nsIUnicodeEncoder.h"
|
|
|
|
#include "nsICharsetConverterManager.h"
|
2002-08-12 23:23:22 +04:00
|
|
|
#include "nsICharsetConverterManager2.h"
|
|
|
|
#include "nsReadableUtils.h"
|
1999-09-21 04:30:12 +04:00
|
|
|
#include "nsITextToSubURI.h"
|
|
|
|
#include "nsIServiceManager.h"
|
|
|
|
#include "nsUConvDll.h"
|
|
|
|
#include "nsEscape.h"
|
|
|
|
#include "prmem.h"
|
2001-12-08 03:25:28 +03:00
|
|
|
#include "nsTextToSubURI.h"
|
2002-05-15 22:55:21 +04:00
|
|
|
#include "nsCRT.h"
|
1999-09-21 04:30:12 +04:00
|
|
|
|
|
|
|
static NS_DEFINE_CID(kITextToSubURIIID, NS_ITEXTTOSUBURI_IID);
|
|
|
|
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
|
|
|
|
|
|
|
nsTextToSubURI::nsTextToSubURI()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
nsTextToSubURI::~nsTextToSubURI()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2000-11-17 11:06:12 +03:00
|
|
|
NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)
|
1999-09-21 04:30:12 +04:00
|
|
|
|
|
|
|
NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
|
|
|
|
const char *charset, const PRUnichar *text, char **_retval)
|
|
|
|
{
|
2000-02-04 02:18:07 +03:00
|
|
|
if(nsnull == _retval)
|
|
|
|
return NS_ERROR_NULL_POINTER;
|
1999-09-21 04:30:12 +04:00
|
|
|
*_retval = nsnull;
|
2000-04-03 09:52:40 +04:00
|
|
|
nsAutoString charsetStr; charsetStr.AssignWithConversion(charset);
|
1999-09-21 04:30:12 +04:00
|
|
|
nsIUnicodeEncoder *encoder = nsnull;
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
// Get Charset, get the encoder.
|
|
|
|
nsICharsetConverterManager * ccm = nsnull;
|
|
|
|
rv = nsServiceManager::GetService(kCharsetConverterManagerCID ,
|
2000-02-03 01:24:56 +03:00
|
|
|
NS_GET_IID(nsICharsetConverterManager),
|
1999-09-21 04:30:12 +04:00
|
|
|
(nsISupports**)&ccm);
|
|
|
|
if(NS_SUCCEEDED(rv) && (nsnull != ccm)) {
|
|
|
|
rv = ccm->GetUnicodeEncoder(&charsetStr, &encoder);
|
|
|
|
nsServiceManager::ReleaseService( kCharsetConverterManagerCID, ccm);
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv)) {
|
|
|
|
rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, (PRUnichar)'?');
|
|
|
|
if(NS_SUCCEEDED(rv))
|
|
|
|
{
|
|
|
|
char buf[256];
|
|
|
|
char *pBuf = buf;
|
|
|
|
PRInt32 ulen = nsCRT::strlen(text);
|
|
|
|
PRInt32 outlen = 0;
|
|
|
|
if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))
|
|
|
|
{
|
|
|
|
if(outlen >= 256) {
|
|
|
|
pBuf = (char*)PR_Malloc(outlen+1);
|
|
|
|
}
|
|
|
|
if(nsnull == pBuf) {
|
|
|
|
outlen = 255;
|
|
|
|
pBuf = buf;
|
|
|
|
}
|
2002-11-14 00:56:01 +03:00
|
|
|
PRInt32 bufLen = outlen;
|
2000-02-04 02:18:07 +03:00
|
|
|
if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
|
2002-11-14 00:56:01 +03:00
|
|
|
// put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
|
|
|
|
PRInt32 finLen = bufLen - outlen;
|
|
|
|
if (finLen > 0) {
|
|
|
|
if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
|
|
|
|
outlen += finLen;
|
|
|
|
}
|
2000-02-04 02:18:07 +03:00
|
|
|
pBuf[outlen] = '\0';
|
|
|
|
*_retval = nsEscape(pBuf, url_XPAlphas);
|
|
|
|
if(nsnull == *_retval)
|
|
|
|
rv = NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(pBuf != buf)
|
|
|
|
PR_Free(pBuf);
|
|
|
|
}
|
|
|
|
NS_IF_RELEASE(encoder);
|
1999-09-21 04:30:12 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
1999-09-21 09:13:53 +04:00
|
|
|
|
2000-02-04 02:18:07 +03:00
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
|
|
|
|
const char *charset, const char *text, PRUnichar **_retval)
|
|
|
|
{
|
|
|
|
if(nsnull == _retval)
|
|
|
|
return NS_ERROR_NULL_POINTER;
|
|
|
|
*_retval = nsnull;
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
|
|
|
// unescape the string, unescape changes the input
|
|
|
|
char *unescaped = nsCRT::strdup((char *) text);
|
|
|
|
if (nsnull == unescaped)
|
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
unescaped = nsUnescape(unescaped);
|
|
|
|
NS_ASSERTION(unescaped, "nsUnescape returned null");
|
|
|
|
|
|
|
|
// Convert from the charset to unicode
|
2001-07-25 11:54:28 +04:00
|
|
|
nsCOMPtr<nsICharsetConverterManager> ccm =
|
|
|
|
do_GetService(kCharsetConverterManagerCID, &rv);
|
2000-02-04 02:18:07 +03:00
|
|
|
if (NS_SUCCEEDED(rv)) {
|
2000-04-03 09:52:40 +04:00
|
|
|
nsAutoString charsetStr; charsetStr.AssignWithConversion(charset);
|
2000-02-04 02:18:07 +03:00
|
|
|
nsIUnicodeDecoder *decoder;
|
|
|
|
rv = ccm->GetUnicodeDecoder(&charsetStr, &decoder);
|
|
|
|
if (NS_SUCCEEDED(rv)) {
|
|
|
|
PRUnichar *pBuf = nsnull;
|
2002-02-19 10:43:41 +03:00
|
|
|
PRInt32 len = strlen(unescaped);
|
2000-02-04 02:18:07 +03:00
|
|
|
PRInt32 outlen = 0;
|
|
|
|
if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
|
|
|
|
pBuf = (PRUnichar *) PR_Malloc((outlen+1)*sizeof(PRUnichar*));
|
|
|
|
if (nsnull == pBuf)
|
|
|
|
rv = NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
else {
|
|
|
|
if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
|
|
|
|
pBuf[outlen] = 0;
|
|
|
|
*_retval = pBuf;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
NS_IF_RELEASE(decoder);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PR_FREEIF(unescaped);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
static PRBool statefulCharset(const char *charset)
|
|
|
|
{
|
|
|
|
if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
|
|
|
|
!nsCRT::strcasecmp(charset, "UTF-7") ||
|
|
|
|
!nsCRT::strcasecmp(charset, "HZ-GB-2312"))
|
|
|
|
return PR_TRUE;
|
|
|
|
|
|
|
|
return PR_FALSE;
|
|
|
|
}
|
|
|
|
|
2002-08-12 23:23:22 +04:00
|
|
|
nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
|
|
|
|
const nsAFlatCString &aURI,
|
|
|
|
PRBool aIRI,
|
|
|
|
nsAString &_retval)
|
|
|
|
{
|
|
|
|
nsresult rv = NS_OK;
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
// check for 7bit encoding the data may not be ASCII after we decode
|
|
|
|
PRBool isStatefulCharset = statefulCharset(aCharset.get());
|
|
|
|
|
|
|
|
if (!isStatefulCharset && IsASCII(aURI)) {
|
2002-08-12 23:23:22 +04:00
|
|
|
_retval.Assign(NS_ConvertASCIItoUCS2(aURI));
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
if (!isStatefulCharset && aIRI) {
|
2003-03-25 11:11:13 +03:00
|
|
|
if (IsUTF8(aURI)) {
|
|
|
|
_retval.Assign(NS_ConvertUTF8toUCS2(aURI));
|
2002-08-12 23:23:22 +04:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-02-19 14:14:35 +03:00
|
|
|
// empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
|
|
|
|
NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
|
|
|
|
|
2002-08-12 23:23:22 +04:00
|
|
|
nsCOMPtr<nsICharsetConverterManager2> charsetConverterManager;
|
|
|
|
|
|
|
|
charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIAtom> charsetAtom;
|
|
|
|
rv = charsetConverterManager->GetCharsetAtom2(aCharset.get(), getter_AddRefs(charsetAtom));
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
|
|
|
nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
|
|
|
|
rv = charsetConverterManager->GetUnicodeDecoder(charsetAtom,
|
|
|
|
getter_AddRefs(unicodeDecoder));
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
|
|
|
PRInt32 srcLen = aURI.Length();
|
|
|
|
PRInt32 dstLen;
|
|
|
|
rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
|
|
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
|
|
|
|
PRUnichar *ustr = (PRUnichar *) nsMemory::Alloc(dstLen * sizeof(PRUnichar));
|
|
|
|
NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
|
|
|
|
|
|
|
|
rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
|
|
|
|
|
|
|
|
if (NS_SUCCEEDED(rv))
|
|
|
|
_retval.Assign(ustr, dstLen);
|
|
|
|
|
|
|
|
nsMemory::Free(ustr);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
|
|
|
|
const nsACString &aURIFragment,
|
|
|
|
nsAString &_retval)
|
|
|
|
{
|
|
|
|
nsCAutoString unescapedSpec(aURIFragment);
|
|
|
|
NS_UnescapeURL(unescapedSpec);
|
|
|
|
|
|
|
|
return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval);
|
|
|
|
}
|
|
|
|
|
2002-09-08 19:08:21 +04:00
|
|
|
NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
|
|
|
|
const nsACString &aURIFragment,
|
|
|
|
nsAString &_retval)
|
|
|
|
{
|
|
|
|
nsCAutoString unescapedSpec;
|
|
|
|
NS_UnescapeURL(PromiseFlatCString(aURIFragment),
|
|
|
|
esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
|
|
|
|
|
|
|
|
return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval);
|
|
|
|
}
|
|
|
|
|
1999-10-01 01:11:05 +04:00
|
|
|
//----------------------------------------------------------------------
|