gecko-dev/intl/uconv/src/nsTextToSubURI.cpp

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Pierre Phaneuf <pp@ludusdesign.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */
#include "nsString.h"
#include "nsIUnicodeEncoder.h"
#include "nsICharsetConverterManager.h"
#include "nsReadableUtils.h"
#include "nsITextToSubURI.h"
#include "nsIServiceManager.h"
#include "nsUConvDll.h"
#include "nsEscape.h"
#include "prmem.h"
#include "nsTextToSubURI.h"
#include "nsCRT.h"

static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);

nsTextToSubURI::nsTextToSubURI()
{
}
nsTextToSubURI::~nsTextToSubURI()
{
}

NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)

NS_IMETHODIMP  nsTextToSubURI::ConvertAndEscape(
  const char *charset, const PRUnichar *text, char **_retval) 
{
  if(nsnull == _retval)
    return NS_ERROR_NULL_POINTER;
  *_retval = nsnull;
  nsresult rv = NS_OK;
  
  // Get Charset, get the encoder.
  nsICharsetConverterManager *ccm;
  rv = CallGetService(kCharsetConverterManagerCID, &ccm);
  if(NS_SUCCEEDED(rv)) {
     nsIUnicodeEncoder *encoder;
     rv = ccm->GetUnicodeEncoder(charset, &encoder);
     NS_RELEASE(ccm);
     if (NS_SUCCEEDED(rv)) {
       rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, (PRUnichar)'?');
       if(NS_SUCCEEDED(rv))
       {
          char buf[256];
          char *pBuf = buf;
          PRInt32 ulen = nsCRT::strlen(text);
          PRInt32 outlen = 0;
          if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) 
          {
             if(outlen >= 256) {
                pBuf = (char*)PR_Malloc(outlen+1);
             }
             if(nsnull == pBuf) {
                outlen = 255;
                pBuf = buf;
             }
             PRInt32 bufLen = outlen;
             if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
                // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
                PRInt32 finLen = bufLen - outlen;
                if (finLen > 0) {
                  if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
                    outlen += finLen;
                }
                pBuf[outlen] = '\0';
                *_retval = nsEscape(pBuf, url_XPAlphas);
                if(nsnull == *_retval)
                  rv = NS_ERROR_OUT_OF_MEMORY;
             }
          }
          if(pBuf != buf)
             PR_Free(pBuf);
       }
       NS_RELEASE(encoder);
     }
  }
  
  return rv;
}

NS_IMETHODIMP  nsTextToSubURI::UnEscapeAndConvert(
  const char *charset, const char *text, PRUnichar **_retval) 
{
  if(nsnull == _retval)
    return NS_ERROR_NULL_POINTER;
  *_retval = nsnull;
  nsresult rv = NS_OK;
  
  // unescape the string, unescape changes the input
  char *unescaped = nsCRT::strdup((char *) text);
  if (nsnull == unescaped)
    return NS_ERROR_OUT_OF_MEMORY;
  unescaped = nsUnescape(unescaped);
  NS_ASSERTION(unescaped, "nsUnescape returned null");

  // Convert from the charset to unicode
  nsCOMPtr<nsICharsetConverterManager> ccm = 
           do_GetService(kCharsetConverterManagerCID, &rv); 
  if (NS_SUCCEEDED(rv)) {
    nsIUnicodeDecoder *decoder;
    rv = ccm->GetUnicodeDecoder(charset, &decoder);
    if (NS_SUCCEEDED(rv)) {
      PRUnichar *pBuf = nsnull;
      PRInt32 len = strlen(unescaped);
      PRInt32 outlen = 0;
      if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
        pBuf = (PRUnichar *) PR_Malloc((outlen+1)*sizeof(PRUnichar*));
        if (nsnull == pBuf)
          rv = NS_ERROR_OUT_OF_MEMORY;
        else {
          if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
            pBuf[outlen] = 0;
            *_retval = pBuf;
          }
          else
            PR_Free(pBuf);
        }
      }
      NS_RELEASE(decoder);
    }
  }
  PR_Free(unescaped);

  return rv;
}

static PRBool statefulCharset(const char *charset)
{
  if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
      !nsCRT::strcasecmp(charset, "UTF-7") ||
      !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
    return PR_TRUE;

  return PR_FALSE;
}

nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
                                             const nsAFlatCString &aURI, 
                                             PRBool aIRI, 
                                             nsAString &_retval)
{
  nsresult rv = NS_OK;

  // check for 7bit encoding the data may not be ASCII after we decode
  PRBool isStatefulCharset = statefulCharset(aCharset.get());

  if (!isStatefulCharset && IsASCII(aURI)) {
    CopyASCIItoUTF16(aURI, _retval);
    return rv;
  }

  if (!isStatefulCharset && aIRI) {
    if (IsUTF8(aURI)) {
      CopyUTF8toUTF16(aURI, _retval);
      return rv;
    }
  }

  // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
  NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);

  nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;

  charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv, rv);

  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
  rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), 
                                                  getter_AddRefs(unicodeDecoder));
  NS_ENSURE_SUCCESS(rv, rv);

  PRInt32 srcLen = aURI.Length();
  PRInt32 dstLen;
  rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
  NS_ENSURE_SUCCESS(rv, rv);

  PRUnichar *ustr = (PRUnichar *) nsMemory::Alloc(dstLen * sizeof(PRUnichar));
  NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);

  rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);

  if (NS_SUCCEEDED(rv))
    _retval.Assign(ustr, dstLen);
  
  nsMemory::Free(ustr);

  return rv;
}

NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, 
                                                const nsACString &aURIFragment, 
                                                nsAString &_retval)
{
  nsCAutoString unescapedSpec;
  // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
  NS_UnescapeURL(PromiseFlatCString(aURIFragment), 
                 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);

  // in case of failure, return escaped URI
  // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
  // sequences are also considered failure in this context
  if (convertURItoUnicode(
                PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval)
      != NS_OK)
    // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
    CopyUTF8toUTF16(aURIFragment, _retval); 
  return NS_OK;
}

NS_IMETHODIMP  nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, 
                                                   const nsACString &aURIFragment, 
                                                   nsAString &_retval)
{
  nsCAutoString unescapedSpec;
  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
                 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);

  return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval);
}

//----------------------------------------------------------------------
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
			`/* *** BEGIN LICENSE BLOCK ***`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
1st check in 1999-09-21 04:30:12 +04:00			`*`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
1st check in 1999-09-21 04:30:12 +04:00			`*`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
1st check in 1999-09-21 04:30:12 +04:00			`*`
updated xPL license boilerplate to v1.1, a=chofmann@netscape.com,r=endico@mozilla.org 1999-11-06 06:43:54 +03:00			`* The Original Code is mozilla.org code.`
			`*`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* The Initial Developer of the Original Code is`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`* Netscape Communications Corporation.`
			`* Portions created by the Initial Developer are Copyright (C) 1998`
			`* the Initial Developer. All Rights Reserved.`
updated xPL license boilerplate to v1.1, a=chofmann@netscape.com,r=endico@mozilla.org 1999-11-06 06:43:54 +03:00			`*`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`* Contributor(s):`
Converting ::GetIID() into NS_GET_IID(). Bug #20232. r=scc, r=mozbot 2000-02-03 01:24:56 +03:00			`* Pierre Phaneuf <pp@ludusdesign.com>`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* either of the GNU General Public License Version 2 or later (the "GPL"),`
			`* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* use your version of this file under the terms of the MPL, indicate your`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
Bug 236613: change to MPL/LGPL/GPL tri-license. 2004-04-18 18:21:17 +04:00			`* the terms of any one of the MPL, the GPL or the LGPL.`
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`*`
			`* *** END LICENSE BLOCK *** */`
1st check in 1999-09-21 04:30:12 +04:00			`#include "nsString.h"`
			`#include "nsIUnicodeEncoder.h"`
			`#include "nsICharsetConverterManager.h"`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`#include "nsReadableUtils.h"`
1st check in 1999-09-21 04:30:12 +04:00			`#include "nsITextToSubURI.h"`
			`#include "nsIServiceManager.h"`
			`#include "nsUConvDll.h"`
			`#include "nsEscape.h"`
			`#include "prmem.h"`
Bug 110486 Removing NSGetFactory from UCONV /r=shanjian; /sr=brendan 2001-12-08 03:25:28 +03:00			`#include "nsTextToSubURI.h"`
Fixes mozilla/strings requiring unfrozen nsCRT class. patch by scc, r=dougt, sr=jag, b=136756 2002-05-15 22:55:21 +04:00			`#include "nsCRT.h"`
1st check in 1999-09-21 04:30:12 +04:00
			`static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);`

			`nsTextToSubURI::nsTextToSubURI()`
			`{`
			`}`
			`nsTextToSubURI::~nsTextToSubURI()`
			`{`
			`}`

#45797 - fix consumers of NS_IMPL_ISUPPORTS r=dveditz a=self 2000-11-17 11:06:12 +03:00			`NS_IMPL_ISUPPORTS1(nsTextToSubURI, nsITextToSubURI)`
1st check in 1999-09-21 04:30:12 +04:00
			`NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(`
			`const char charset, const PRUnichar text, char **_retval)`
			`{`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`if(nsnull == _retval)`
			`return NS_ERROR_NULL_POINTER;`
1st check in 1999-09-21 04:30:12 +04:00			`*_retval = nsnull;`
			`nsresult rv = NS_OK;`

			`// Get Charset, get the encoder.`
fixes bug 219400 "remove callers of nsServiceManager:: methods" r=bsmedberg 2004-11-08 02:59:35 +03:00			`nsICharsetConverterManager *ccm;`
			`rv = CallGetService(kCharsetConverterManagerCID, &ccm);`
			`if(NS_SUCCEEDED(rv)) {`
			`nsIUnicodeEncoder *encoder;`
fix for bug 206379: - combine nsICharsetConverterManager2 and nsICharsetConverterManager - get rid of nsIAtom in most of the methods - provide versions of getUnicodeDecoder/Encoder which don't do alias resolution - change all charset types to ASCII strings - clean up some other i18n APIs which could be simplified - fix all consumers of all changed i18n interfaces r=jshin, smontagu rs=sfraser 2003-06-11 22:16:03 +04:00			`rv = ccm->GetUnicodeEncoder(charset, &encoder);`
fixes bug 219400 "remove callers of nsServiceManager:: methods" r=bsmedberg 2004-11-08 02:59:35 +03:00			`NS_RELEASE(ccm);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`if (NS_SUCCEEDED(rv)) {`
			`rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nsnull, (PRUnichar)'?');`
			`if(NS_SUCCEEDED(rv))`
			`{`
			`char buf[256];`
			`char *pBuf = buf;`
			`PRInt32 ulen = nsCRT::strlen(text);`
			`PRInt32 outlen = 0;`
			`if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))`
			`{`
			`if(outlen >= 256) {`
			`pBuf = (char*)PR_Malloc(outlen+1);`
			`}`
			`if(nsnull == pBuf) {`
			`outlen = 255;`
			`pBuf = buf;`
			`}`
Call nsIUnicharEncoder::Finish to ensure the converted string is terminated, bug 179392, r=shanjian, sr=bzbarsky 2002-11-14 00:56:01 +03:00			`PRInt32 bufLen = outlen;`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {`
Call nsIUnicharEncoder::Finish to ensure the converted string is terminated, bug 179392, r=shanjian, sr=bzbarsky 2002-11-14 00:56:01 +03:00			`// put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary`
			`PRInt32 finLen = bufLen - outlen;`
			`if (finLen > 0) {`
			`if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))`
			`outlen += finLen;`
			`}`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`pBuf[outlen] = '\0';`
			`*_retval = nsEscape(pBuf, url_XPAlphas);`
			`if(nsnull == *_retval)`
			`rv = NS_ERROR_OUT_OF_MEMORY;`
			`}`
			`}`
			`if(pBuf != buf)`
			`PR_Free(pBuf);`
			`}`
fixes bug 219400 "remove callers of nsServiceManager:: methods" r=bsmedberg 2004-11-08 02:59:35 +03:00			`NS_RELEASE(encoder);`
1st check in 1999-09-21 04:30:12 +04:00			`}`
			`}`

			`return rv;`
			`}`
wired up nsTextToSubURI to the dll, remove unnecessary reghac2.h 1999-09-21 09:13:53 +04:00
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(`
			`const char charset, const char text, PRUnichar **_retval)`
			`{`
			`if(nsnull == _retval)`
			`return NS_ERROR_NULL_POINTER;`
			`*_retval = nsnull;`
			`nsresult rv = NS_OK;`

			`// unescape the string, unescape changes the input`
			`char unescaped = nsCRT::strdup((char ) text);`
			`if (nsnull == unescaped)`
			`return NS_ERROR_OUT_OF_MEMORY;`
			`unescaped = nsUnescape(unescaped);`
			`NS_ASSERTION(unescaped, "nsUnescape returned null");`

			`// Convert from the charset to unicode`
Bug 86734: Remove NS_WITH_SERVICE. r=dbaron, rs=scc, a=asa 2001-07-25 11:54:28 +04:00			`nsCOMPtr<nsICharsetConverterManager> ccm =`
			`do_GetService(kCharsetConverterManagerCID, &rv);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`if (NS_SUCCEEDED(rv)) {`
			`nsIUnicodeDecoder *decoder;`
fix for bug 206379: - combine nsICharsetConverterManager2 and nsICharsetConverterManager - get rid of nsIAtom in most of the methods - provide versions of getUnicodeDecoder/Encoder which don't do alias resolution - change all charset types to ASCII strings - clean up some other i18n APIs which could be simplified - fix all consumers of all changed i18n interfaces r=jshin, smontagu rs=sfraser 2003-06-11 22:16:03 +04:00			`rv = ccm->GetUnicodeDecoder(charset, &decoder);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`if (NS_SUCCEEDED(rv)) {`
			`PRUnichar *pBuf = nsnull;`
eliminate nsCRT::strlen for char* strings (part 2), bug 124536 r=dp sr=brendan 2002-02-19 10:43:41 +03:00			`PRInt32 len = strlen(unescaped);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`PRInt32 outlen = 0;`
			`if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {`
			`pBuf = (PRUnichar ) PR_Malloc((outlen+1)sizeof(PRUnichar*));`
			`if (nsnull == pBuf)`
			`rv = NS_ERROR_OUT_OF_MEMORY;`
			`else {`
			`if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {`
			`pBuf[outlen] = 0;`
			`*_retval = pBuf;`
			`}`
Bug 333298: nsTextToSubURI::UnEscapeAndConvert leaks pBuf if decoder->Convert fails, patch by Ryan Flint <rflint@dslr.net>, r=timeless, sr=jag 2006-04-14 03:38:55 +04:00			`else`
			`PR_Free(pBuf);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`}`
			`}`
fixes bug 219400 "remove callers of nsServiceManager:: methods" r=bsmedberg 2004-11-08 02:59:35 +03:00			`NS_RELEASE(decoder);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00			`}`
			`}`
fixes bug 219400 "remove callers of nsServiceManager:: methods" r=bsmedberg 2004-11-08 02:59:35 +03:00			`PR_Free(unescaped);`
Added a function to url unescape and convert to unicode, bug 25034, r=ftang. 2000-02-04 02:18:07 +03:00
			`return rv;`
			`}`

Added a function to unescape 8bit only, 7bit encoding support. bug 161479, r=shanjian, sr=jst, a=dbaron. 2002-09-08 19:08:21 +04:00			`static PRBool statefulCharset(const char *charset)`
			`{`
			`if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) \|\|`
			`!nsCRT::strcasecmp(charset, "UTF-7") \|\|`
			`!nsCRT::strcasecmp(charset, "HZ-GB-2312"))`
			`return PR_TRUE;`

			`return PR_FALSE;`
			`}`

Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,`
			`const nsAFlatCString &aURI,`
			`PRBool aIRI,`
			`nsAString &_retval)`
			`{`
			`nsresult rv = NS_OK;`

Added a function to unescape 8bit only, 7bit encoding support. bug 161479, r=shanjian, sr=jst, a=dbaron. 2002-09-08 19:08:21 +04:00			`// check for 7bit encoding the data may not be ASCII after we decode`
			`PRBool isStatefulCharset = statefulCharset(aCharset.get());`

			`if (!isStatefulCharset && IsASCII(aURI)) {`
Patch for bug 209699 (convert some consumers over to CopyUTF8toUTF16 / CopyUTF16toUTF8). r=jshin, sr=jst. 2003-12-23 19:48:40 +03:00			`CopyASCIItoUTF16(aURI, _retval);`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`return rv;`
			`}`

Added a function to unescape 8bit only, 7bit encoding support. bug 161479, r=shanjian, sr=jst, a=dbaron. 2002-09-08 19:08:21 +04:00			`if (!isStatefulCharset && aIRI) {`
bug 191542 : Add UTF-8 equivalent of \|IsASCII\|, IsUTF8. r=smontagu, sr=alecf 2003-03-25 11:11:13 +03:00			`if (IsUTF8(aURI)) {`
Patch for bug 209699 (convert some consumers over to CopyUTF8toUTF16 / CopyUTF16toUTF8). r=jshin, sr=jst. 2003-12-23 19:48:40 +03:00			`CopyUTF8toUTF16(aURI, _retval);`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`return rv;`
			`}`
			`}`

bug 163998: URL-unescape the image URL for rendering in the title bar (r=nhotta,jst, sr=darin, a=asa) 2003-02-19 14:14:35 +03:00			`// empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.`
			`NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);`

fix for bug 206379: - combine nsICharsetConverterManager2 and nsICharsetConverterManager - get rid of nsIAtom in most of the methods - provide versions of getUnicodeDecoder/Encoder which don't do alias resolution - change all charset types to ASCII strings - clean up some other i18n APIs which could be simplified - fix all consumers of all changed i18n interfaces r=jshin, smontagu rs=sfraser 2003-06-11 22:16:03 +04:00			`nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00
			`charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);`
			`NS_ENSURE_SUCCESS(rv, rv);`

			`nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;`
fix for bug 206379: - combine nsICharsetConverterManager2 and nsICharsetConverterManager - get rid of nsIAtom in most of the methods - provide versions of getUnicodeDecoder/Encoder which don't do alias resolution - change all charset types to ASCII strings - clean up some other i18n APIs which could be simplified - fix all consumers of all changed i18n interfaces r=jshin, smontagu rs=sfraser 2003-06-11 22:16:03 +04:00			`rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(),`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`getter_AddRefs(unicodeDecoder));`
			`NS_ENSURE_SUCCESS(rv, rv);`

			`PRInt32 srcLen = aURI.Length();`
			`PRInt32 dstLen;`
			`rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);`
			`NS_ENSURE_SUCCESS(rv, rv);`

			`PRUnichar ustr = (PRUnichar ) nsMemory::Alloc(dstLen * sizeof(PRUnichar));`
			`NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);`

			`rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);`

			`if (NS_SUCCEEDED(rv))`
			`_retval.Assign(ustr, dstLen);`

			`nsMemory::Free(ustr);`

			`return rv;`
			`}`

			`NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,`
			`const nsACString &aURIFragment,`
			`nsAString &_retval)`
			`{`
bug 228176 exclude control characters from unescaping URLs for the UI : r=darin, sr=jst 2004-01-07 06:57:20 +03:00			`nsCAutoString unescapedSpec;`
bug 228176 : in the previous checkin, missed the reviewer comment about the name change (exclude -> skip) 2004-01-07 07:17:40 +03:00			`// skip control octets (0x00 - 0x1f and 0x7f) when unescaping`
bug 228176 exclude control characters from unescaping URLs for the UI : r=darin, sr=jst 2004-01-07 06:57:20 +03:00			`NS_UnescapeURL(PromiseFlatCString(aURIFragment),`
bug 228176 : in the previous checkin, missed the reviewer comment about the name change (exclude -> skip) 2004-01-07 07:17:40 +03:00			`esc_SkipControl \| esc_AlwaysCopy, unescapedSpec);`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00
bug 244754 : URL is not shown in the status bar when hovering over a url-escaped URL in an encoding different from the document enecoding (r=darin, sr=bzbarsky) 2005-02-22 21:25:12 +03:00			`// in case of failure, return escaped URI`
Bug 339002 Last %-escaped character in URL disappears in status bar if page displayed as UTF-8. r=jshin, sr=roc 2006-05-28 09:39:25 +04:00			`// Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte`
			`// sequences are also considered failure in this context`
			`if (convertURItoUnicode(`
			`PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval)`
			`!= NS_OK)`
bug 244754 : URL is not shown in the status bar when hovering over a url-escaped URL in an encoding different from the document enecoding (r=darin, sr=bzbarsky) 2005-02-22 21:25:12 +03:00			`// assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8`
			`CopyUTF8toUTF16(aURIFragment, _retval);`
			`return NS_OK;`
Added unescape function which takes a charset to uconv, changed uriloader to use the uconv unescape to handle unescape non Ascii URI correctly. bug 155569, r=ftang, sr=bzbarsky. 2002-08-12 23:23:22 +04:00			`}`

Added a function to unescape 8bit only, 7bit encoding support. bug 161479, r=shanjian, sr=jst, a=dbaron. 2002-09-08 19:08:21 +04:00			`NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,`
			`const nsACString &aURIFragment,`
			`nsAString &_retval)`
			`{`
			`nsCAutoString unescapedSpec;`
			`NS_UnescapeURL(PromiseFlatCString(aURIFragment),`
			`esc_AlwaysCopy \| esc_OnlyNonASCII, unescapedSpec);`

			`return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, PR_TRUE, _retval);`
			`}`

Convert to module from component 1999-10-01 01:11:05 +04:00			`//----------------------------------------------------------------------`