gecko-dev/intl/uconv/ucvcn/nsHZToUnicode.cpp

/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
 * A character set converter from HZ to Unicode.
 * 
 *
 * @created         08/Sept/1999
 * @author  Yueheng Xu, Yueheng.Xu@intel.com
 *
 * Note: in this HZ-GB-2312 converter, we accept a string composed of 7-bit HZ 
 *       encoded Chinese chars,as it is defined in RFC1843 available at 
 *       http://www.cis.ohio-state.edu/htbin/rfc/rfc1843.html
 *       and RFC1842 available at http://www.cis.ohio-state.edu/htbin/rfc/rfc1842.html.
 *        
 *       Earlier versions of the converter said:
 *        "In an effort to match the similar extended capability of Microsoft 
 *         Internet Explorer 5.0. We also accept the 8-bit GB encoded chars
 *         mixed in a HZ string. 
 *         But this should not be a recommendedd practice for HTML authors."
 *       However, testing in current versions of IE shows that it only accepts
 *       8-bit characters when the converter is in GB state, and when in ASCII
 *       state each single 8-bit character is converted to U+FFFD
 *
 *       The priority of converting are as follows: first convert 8-bit GB code; then,
 *       consume HZ ESC sequences such as '~{', '~}', '~~'; then, depending on the current
 *       state ( default to ASCII state ) of the string, each 7-bit char is converted as an 
 *       ASCII, or two 7-bit chars are converted into a Chinese character.
 */


#include "nsHZToUnicode.h"
#include "gbku.h"
#include "mozilla/Telemetry.h"

//----------------------------------------------------------------------
// Class nsHZToUnicode [implementation]

//----------------------------------------------------------------------
// Subclassing of nsTablesDecoderSupport class [implementation]

#define HZ_STATE_GB     1
#define HZ_STATE_ASCII  2
#define HZ_STATE_ODD_BYTE_FLAG 0x80
#define HZLEAD1 '~'
#define HZLEAD2 '{'
#define HZLEAD3 '}'
#define HZ_ODD_BYTE_STATE (mHZState & (HZ_STATE_ODD_BYTE_FLAG))
#define HZ_ENCODING_STATE (mHZState & ~(HZ_STATE_ODD_BYTE_FLAG))

using namespace mozilla;

nsHZToUnicode::nsHZToUnicode() : nsBufferDecoderSupport(1)
{
  mHZState = HZ_STATE_ASCII;    // per HZ spec, default to ASCII state 
  mRunLength = 0;
  mOddByte = 0;
  Telemetry::Accumulate(Telemetry::DECODER_INSTANTIATED_HZ, true);
}

//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.
NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(
  const char* aSrc, 
  int32_t * aSrcLength, 
  PRUnichar *aDest, 
  int32_t * aDestLength)
{
  int32_t i=0;
  int32_t iSrcLength = *aSrcLength;
  int32_t iDestlen = 0;
  *aSrcLength=0;
  nsresult res = NS_OK;
  char oddByte = mOddByte;

  for (i=0; i<iSrcLength; i++) {
    if (iDestlen >= (*aDestLength)) {
      res = NS_OK_UDEC_MOREOUTPUT;
      break;
    }

    char srcByte = *aSrc++;
    (*aSrcLength)++;
    
    if (!HZ_ODD_BYTE_STATE) {
      if (srcByte == HZLEAD1 || 
          (HZ_ENCODING_STATE == HZ_STATE_GB && 
           (UINT8_IN_RANGE(0x21, srcByte, 0x7E) ||
            UINT8_IN_RANGE(0x81, srcByte, 0xFE)))) {
        oddByte = srcByte;
        mHZState |= HZ_STATE_ODD_BYTE_FLAG;
      } else {
        *aDest++ = (srcByte & 0x80) ? UCS2_NO_MAPPING :
                                      CAST_CHAR_TO_UNICHAR(srcByte);
        iDestlen++;
      }
    } else {
      if (oddByte & 0x80) {
        // Accept legal 8-bit GB 2312-80 sequences in GB mode only
        NS_ASSERTION(HZ_ENCODING_STATE == HZ_STATE_GB,
                     "Invalid lead byte in ASCII mode");                    
        *aDest++ = (UINT8_IN_RANGE(0x81, oddByte, 0xFE) &&
                    UINT8_IN_RANGE(0x40, srcByte, 0xFE)) ?
                     mUtil.GBKCharToUnicode(oddByte, srcByte) : UCS2_NO_MAPPING;
        mRunLength++;
        iDestlen++;
      // otherwise, it is a 7-bit byte 
      // The source will be an ASCII or a 7-bit HZ code depending on oddByte
      } else if (oddByte == HZLEAD1) { // if it is lead by '~'
        switch (srcByte) {
          case HZLEAD2: 
            // we got a '~{'
            // we are switching to HZ state
            mHZState = HZ_STATE_GB;
            mRunLength = 0;
            break;

          case HZLEAD3: 
            // we got a '~}'
            // we are switching to ASCII state
            mHZState = HZ_STATE_ASCII;
            if (mRunLength == 0) {
              *aDest++ = UCS2_NO_MAPPING;
              iDestlen++;
            }
            mRunLength = 0;
            break;

          case HZLEAD1: 
            // we got a '~~', process like an ASCII, but no state change
            *aDest++ = CAST_CHAR_TO_UNICHAR(srcByte);
            iDestlen++;
            mRunLength++;
            break;

          default:
            // Undefined ESC sequence '~X': treat as an error if X is a
            // printable character or we are in ASCII mode, and resynchronize
            // on the second character.
            // 
            // N.B. For compatibility with other implementations, we treat '~\n'
            // as an illegal sequence even though RFC1843 permits it, and for
            // the same reason we pass through control characters including '\n'
            // and ' ' even in GB mode.
            if (srcByte > 0x20 || HZ_ENCODING_STATE == HZ_STATE_ASCII) {
              *aDest++ = UCS2_NO_MAPPING;
              iDestlen++;
            }
            aSrc--;
            (*aSrcLength)--;
            i--;
            break;
        }
      } else if (HZ_ENCODING_STATE == HZ_STATE_GB) {
        *aDest++ = (UINT8_IN_RANGE(0x21, oddByte, 0x7E) &&
                    UINT8_IN_RANGE(0x21, srcByte, 0x7E)) ?
                     mUtil.GBKCharToUnicode(oddByte|0x80, srcByte|0x80) :
                     UCS2_NO_MAPPING;
        mRunLength++;
        iDestlen++;
      } else {
        NS_NOTREACHED("2-byte sequence that we don't know how to handle");
        *aDest++ = UCS2_NO_MAPPING;
        iDestlen++;
      }
      oddByte = 0;
      mHZState &= ~HZ_STATE_ODD_BYTE_FLAG;
    }
  } // for loop
  mOddByte = HZ_ODD_BYTE_STATE ? oddByte : 0;
  *aDestLength = iDestlen;
  return res;
}
License changes, take 2. Bug 98089. mozilla/include/, /mozilla/htmlparser/, /mozilla/intl/ (part 1). 2001-09-26 04:40:45 +04:00			`/* -- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
Bug 716478 - update licence to MPL 2. 2012-05-21 15:12:37 +04:00			`/* This Source Code Form is subject to the terms of the Mozilla Public`
			`* License, v. 2.0. If a copy of the MPL was not distributed with this`
			`* file, You can obtain one at http://mozilla.org/MPL/2.0/. */`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00			`/**`
Correctly check for the HZ converter engine state. 1999-10-21 04:03:52 +04:00			`* A character set converter from HZ to Unicode.`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00			`*`
			`*`
			`* @created 08/Sept/1999`
			`* @author Yueheng Xu, Yueheng.Xu@intel.com`
			`*`
			`* Note: in this HZ-GB-2312 converter, we accept a string composed of 7-bit HZ`
			`* encoded Chinese chars,as it is defined in RFC1843 available at`
			`* http://www.cis.ohio-state.edu/htbin/rfc/rfc1843.html`
			`* and RFC1842 available at http://www.cis.ohio-state.edu/htbin/rfc/rfc1842.html.`
			`*`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`* Earlier versions of the converter said:`
			`* "In an effort to match the similar extended capability of Microsoft`
			`* Internet Explorer 5.0. We also accept the 8-bit GB encoded chars`
			`* mixed in a HZ string.`
			`* But this should not be a recommendedd practice for HTML authors."`
			`* However, testing in current versions of IE shows that it only accepts`
			`* 8-bit characters when the converter is in GB state, and when in ASCII`
			`* state each single 8-bit character is converted to U+FFFD`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00			`*`
			`* The priority of converting are as follows: first convert 8-bit GB code; then,`
			`* consume HZ ESC sequences such as '~{', '~}', '~~'; then, depending on the current`
			`* state ( default to ASCII state ) of the string, each 7-bit char is converted as an`
			`* ASCII, or two 7-bit chars are converted into a Chinese character.`
			`*/`



			`#include "nsHZToUnicode.h"`
			`#include "gbku.h"`
Bug 935453 - Gather telemetry about dangerous encodings and encodings we might remove. r=emk,sstamm. 2013-11-11 19:04:02 +04:00			`#include "mozilla/Telemetry.h"`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00
			`//----------------------------------------------------------------------`
			`// Class nsHZToUnicode [implementation]`

			`//----------------------------------------------------------------------`
			`// Subclassing of nsTablesDecoderSupport class [implementation]`

Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`#define HZ_STATE_GB 1`
			`#define HZ_STATE_ASCII 2`
			`#define HZ_STATE_ODD_BYTE_FLAG 0x80`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00			`#define HZLEAD1 '~'`
			`#define HZLEAD2 '{'`
			`#define HZLEAD3 '}'`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`#define HZ_ODD_BYTE_STATE (mHZState & (HZ_STATE_ODD_BYTE_FLAG))`
			`#define HZ_ENCODING_STATE (mHZState & ~(HZ_STATE_ODD_BYTE_FLAG))`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00
Bug 935453 - Gather telemetry about dangerous encodings and encodings we might remove. r=emk,sstamm. 2013-11-11 19:04:02 +04:00			`using namespace mozilla;`

fix for bug 157993 - combine all unicode converters into a single library, and share GetMaxLength() implementation between all converters that can support it. r=ftang, sr=blizzard 2002-08-12 23:16:16 +04:00			`nsHZToUnicode::nsHZToUnicode() : nsBufferDecoderSupport(1)`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`{`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`mHZState = HZ_STATE_ASCII; // per HZ spec, default to ASCII state`
Treat all empty and incomplete sequences as encoding errors, and some other clean-up. Bug 381412, r=jshin, sr=dveditz, b1.9=jst 2007-09-06 09:02:17 +04:00			`mRunLength = 0;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`mOddByte = 0;`
Bug 935453 - Gather telemetry about dangerous encodings and encodings we might remove. r=emk,sstamm. 2013-11-11 19:04:02 +04:00			`Telemetry::Accumulate(Telemetry::DECODER_INSTANTIATED_HZ, true);`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`}`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`//Overwriting the ConvertNoBuff() in nsUCvCnSupport.cpp.`
			`NS_IMETHODIMP nsHZToUnicode::ConvertNoBuff(`
			`const char* aSrc,`
Bug 579517 - Part 1: Automated conversion of NSPR numeric types to stdint types in Gecko; r=bsmedberg This patch was generated by a script. Here's the source of the script for future reference: function convert() { echo "Converting $1 to $2..." find . ! -wholename "nsprpub" \ ! -wholename "security/nss" \ ! -wholename "/.hg" \ ! -wholename "obj-ff-dbg" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname ".cpp" \ -o -iname ".h" \ -o -iname ".c" \ -o -iname ".cc" \ -o -iname ".idl" \ -o -iname ".ipdl" \ -o -iname ".ipdlh" \ -o -iname "*.mm" \) \| \ xargs -n 1 sed -i -e "s/\b$1\b/$2/g" } convert PRInt8 int8_t convert PRUint8 uint8_t convert PRInt16 int16_t convert PRUint16 uint16_t convert PRInt32 int32_t convert PRUint32 uint32_t convert PRInt64 int64_t convert PRUint64 uint64_t convert PRIntn int convert PRUintn unsigned convert PRSize size_t convert PROffset32 int32_t convert PROffset64 int64_t convert PRPtrdiff ptrdiff_t convert PRFloat64 double 2012-08-22 19:56:38 +04:00			`int32_t * aSrcLength,`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`PRUnichar *aDest,`
Bug 579517 - Part 1: Automated conversion of NSPR numeric types to stdint types in Gecko; r=bsmedberg This patch was generated by a script. Here's the source of the script for future reference: function convert() { echo "Converting $1 to $2..." find . ! -wholename "nsprpub" \ ! -wholename "security/nss" \ ! -wholename "/.hg" \ ! -wholename "obj-ff-dbg" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname ".cpp" \ -o -iname ".h" \ -o -iname ".c" \ -o -iname ".cc" \ -o -iname ".idl" \ -o -iname ".ipdl" \ -o -iname ".ipdlh" \ -o -iname "*.mm" \) \| \ xargs -n 1 sed -i -e "s/\b$1\b/$2/g" } convert PRInt8 int8_t convert PRUint8 uint8_t convert PRInt16 int16_t convert PRUint16 uint16_t convert PRInt32 int32_t convert PRUint32 uint32_t convert PRInt64 int64_t convert PRUint64 uint64_t convert PRIntn int convert PRUintn unsigned convert PRSize size_t convert PROffset32 int32_t convert PROffset64 int64_t convert PRPtrdiff ptrdiff_t convert PRFloat64 double 2012-08-22 19:56:38 +04:00			`int32_t * aDestLength)`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`{`
Bug 579517 - Part 1: Automated conversion of NSPR numeric types to stdint types in Gecko; r=bsmedberg This patch was generated by a script. Here's the source of the script for future reference: function convert() { echo "Converting $1 to $2..." find . ! -wholename "nsprpub" \ ! -wholename "security/nss" \ ! -wholename "/.hg" \ ! -wholename "obj-ff-dbg" \ ! -name nsXPCOMCID.h \ ! -name prtypes.h \ -type f \ \( -iname ".cpp" \ -o -iname ".h" \ -o -iname ".c" \ -o -iname ".cc" \ -o -iname ".idl" \ -o -iname ".ipdl" \ -o -iname ".ipdlh" \ -o -iname "*.mm" \) \| \ xargs -n 1 sed -i -e "s/\b$1\b/$2/g" } convert PRInt8 int8_t convert PRUint8 uint8_t convert PRInt16 int16_t convert PRUint16 uint16_t convert PRInt32 int32_t convert PRUint32 uint32_t convert PRInt64 int64_t convert PRUint64 uint64_t convert PRIntn int convert PRUintn unsigned convert PRSize size_t convert PROffset32 int32_t convert PROffset64 int64_t convert PRPtrdiff ptrdiff_t convert PRFloat64 double 2012-08-22 19:56:38 +04:00			`int32_t i=0;`
			`int32_t iSrcLength = *aSrcLength;`
			`int32_t iDestlen = 0;`
Bug 75707: Some BIG5 characters can not be displayed properly in Solaris Trunk add a boolean value to valid the med checking in for ftang 2001-07-14 00:42:34 +04:00			`*aSrcLength=0;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`nsresult res = NS_OK;`
			`char oddByte = mOddByte;`

			`for (i=0; i<iSrcLength; i++) {`
			`if (iDestlen >= (*aDestLength)) {`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`res = NS_OK_UDEC_MOREOUTPUT;`
			`break;`
			`}`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00
			`char srcByte = *aSrc++;`
			`(*aSrcLength)++;`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`if (!HZ_ODD_BYTE_STATE) {`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`if (srcByte == HZLEAD1 \|\|`
			`(HZ_ENCODING_STATE == HZ_STATE_GB &&`
			`(UINT8_IN_RANGE(0x21, srcByte, 0x7E) \|\|`
			`UINT8_IN_RANGE(0x81, srcByte, 0xFE)))) {`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`oddByte = srcByte;`
			`mHZState \|= HZ_STATE_ODD_BYTE_FLAG;`
Treat all empty and incomplete sequences as encoding errors, and some other clean-up. Bug 381412, r=jshin, sr=dveditz, b1.9=jst 2007-09-06 09:02:17 +04:00			`} else {`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`*aDest++ = (srcByte & 0x80) ? UCS2_NO_MAPPING :`
			`CAST_CHAR_TO_UNICHAR(srcByte);`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`iDestlen++;`
Treat all empty and incomplete sequences as encoding errors, and some other clean-up. Bug 381412, r=jshin, sr=dveditz, b1.9=jst 2007-09-06 09:02:17 +04:00			`}`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`} else {`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`if (oddByte & 0x80) {`
			`// Accept legal 8-bit GB 2312-80 sequences in GB mode only`
			`NS_ASSERTION(HZ_ENCODING_STATE == HZ_STATE_GB,`
			`"Invalid lead byte in ASCII mode");`
			`*aDest++ = (UINT8_IN_RANGE(0x81, oddByte, 0xFE) &&`
			`UINT8_IN_RANGE(0x40, srcByte, 0xFE)) ?`
			`mUtil.GBKCharToUnicode(oddByte, srcByte) : UCS2_NO_MAPPING;`
			`mRunLength++;`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`iDestlen++;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`// otherwise, it is a 7-bit byte`
			`// The source will be an ASCII or a 7-bit HZ code depending on oddByte`
			`} else if (oddByte == HZLEAD1) { // if it is lead by '~'`
			`switch (srcByte) {`
			`case HZLEAD2:`
			`// we got a '~{'`
			`// we are switching to HZ state`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`mHZState = HZ_STATE_GB;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`mRunLength = 0;`
			`break;`

			`case HZLEAD3:`
			`// we got a '~}'`
			`// we are switching to ASCII state`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`mHZState = HZ_STATE_ASCII;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`if (mRunLength == 0) {`
			`*aDest++ = UCS2_NO_MAPPING;`
			`iDestlen++;`
			`}`
			`mRunLength = 0;`
			`break;`

			`case HZLEAD1:`
			`// we got a '~~', process like an ASCII, but no state change`
			`*aDest++ = CAST_CHAR_TO_UNICHAR(srcByte);`
			`iDestlen++;`
			`mRunLength++;`
			`break;`

			`default:`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`// Undefined ESC sequence '~X': treat as an error if X is a`
			`// printable character or we are in ASCII mode, and resynchronize`
			`// on the second character.`
			`//`
			`// N.B. For compatibility with other implementations, we treat '~\n'`
			`// as an illegal sequence even though RFC1843 permits it, and for`
			`// the same reason we pass through control characters including '\n'`
			`// and ' ' even in GB mode.`
			`if (srcByte > 0x20 \|\| HZ_ENCODING_STATE == HZ_STATE_ASCII) {`
			`*aDest++ = UCS2_NO_MAPPING;`
Fix off-by-one error in conversion pointers. Bug 801681, r=emk 2012-10-18 18:35:04 +04:00			`iDestlen++;`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`}`
			`aSrc--;`
			`(*aSrcLength)--;`
Fix off-by-one error in conversion pointers. Bug 801681, r=emk 2012-10-18 18:35:04 +04:00			`i--;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`break;`
			`}`
			`} else if (HZ_ENCODING_STATE == HZ_STATE_GB) {`
Various fixes to multi-byte Unicode decoders. Bug 715319, r=emk 2012-03-17 00:41:41 +04:00			`*aDest++ = (UINT8_IN_RANGE(0x21, oddByte, 0x7E) &&`
			`UINT8_IN_RANGE(0x21, srcByte, 0x7E)) ?`
			`mUtil.GBKCharToUnicode(oddByte\|0x80, srcByte\|0x80) :`
			`UCS2_NO_MAPPING;`
Treat all empty and incomplete sequences as encoding errors, and some other clean-up. Bug 381412, r=jshin, sr=dveditz, b1.9=jst 2007-09-06 09:02:17 +04:00			`mRunLength++;`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`iDestlen++;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`} else {`
			`NS_NOTREACHED("2-byte sequence that we don't know how to handle");`
			`*aDest++ = UCS2_NO_MAPPING;`
			`iDestlen++;`
			`}`
			`oddByte = 0;`
			`mHZState &= ~HZ_STATE_ODD_BYTE_FLAG;`
fix 80772. r=bstell clean up ucvcn and add gb18030 2001-05-15 16:52:29 +04:00			`}`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`} // for loop`
			`mOddByte = HZ_ODD_BYTE_STATE ? oddByte : 0;`
Correctly check for the HZ converter engine state. 1999-10-21 04:03:52 +04:00			`*aDestLength = iDestlen;`
Bug 494099, HZ-GB-2312 converter reads beyond input buffer and omits characters at block boundaries. r=VYV03354@nifty.ne.jp 2009-06-16 11:13:28 +04:00			`return res;`
check in HZ to Unicode contributed by Xu, Yueheng <yueheng.xu@intel.com> 1999-09-18 04:05:27 +04:00			`}`