b=184120 Add non-BMP char. support to UTF-32 converters.

r=smontague, sr=dbaron, a=asa
2003-01-31 23:26:20 +00:00 · 2003-01-31 23:26:20 +00:00 · 13b3fd479c
--- a/intl/uconv/macbuild/uconv.xml
+++ b/intl/uconv/macbuild/uconv.xml
@ -1859,14 +1859,7 @@
                </FILE>
                <FILE>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4LE.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                    <FILEKIND>Text</FILEKIND>
-                    <FILEFLAGS>Debug</FILEFLAGS>
-                </FILE>
-                <FILE>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4BE.cpp</PATH>
+                    <PATH>nsUnicodeToUTF32.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                    <FILEKIND>Text</FILEKIND>
                    <FILEFLAGS>Debug</FILEFLAGS>
@ -1901,14 +1894,7 @@
                </FILE>
                <FILE>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4LEToUnicode.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                    <FILEKIND>Text</FILEKIND>
-                    <FILEFLAGS>Debug</FILEFLAGS>
-                </FILE>
-                <FILE>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4BEToUnicode.cpp</PATH>
+                    <PATH>nsUTF32ToUnicode.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                    <FILEKIND>Text</FILEKIND>
                    <FILEFLAGS>Debug</FILEFLAGS>
@ -3175,12 +3161,7 @@
                </FILEREF>
                <FILEREF>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4LE.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4BE.cpp</PATH>
+                    <PATH>nsUnicodeToUTF32.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
                <FILEREF>
@ -3205,12 +3186,7 @@
                </FILEREF>
                <FILEREF>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4LEToUnicode.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4BEToUnicode.cpp</PATH>
+                    <PATH>nsUTF32ToUnicode.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
                <FILEREF>
@ -5491,14 +5467,7 @@
                </FILE>
                <FILE>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4LE.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                    <FILEKIND>Text</FILEKIND>
-                    <FILEFLAGS>Debug</FILEFLAGS>
-                </FILE>
-                <FILE>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4BE.cpp</PATH>
+                    <PATH>nsUnicodeToUTF32.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                    <FILEKIND>Text</FILEKIND>
                    <FILEFLAGS>Debug</FILEFLAGS>
@ -5533,14 +5502,7 @@
                </FILE>
                <FILE>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4LEToUnicode.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                    <FILEKIND>Text</FILEKIND>
-                    <FILEFLAGS>Debug</FILEFLAGS>
-                </FILE>
-                <FILE>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4BEToUnicode.cpp</PATH>
+                    <PATH>nsUTF32ToUnicode.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                    <FILEKIND>Text</FILEKIND>
                    <FILEFLAGS>Debug</FILEFLAGS>
@ -6807,12 +6769,7 @@
                </FILEREF>
                <FILEREF>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4LE.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4BE.cpp</PATH>
+                    <PATH>nsUnicodeToUTF32.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
                <FILEREF>
@ -6837,12 +6794,7 @@
                </FILEREF>
                <FILEREF>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4LEToUnicode.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4BEToUnicode.cpp</PATH>
+                    <PATH>nsUTF32ToUnicode.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
                <FILEREF>
@ -8374,13 +8326,7 @@
                <FILEREF>
                    <TARGETNAME>uconv.shlb</TARGETNAME>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4LEToUnicode.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <TARGETNAME>uconv.shlb</TARGETNAME>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUCS4BEToUnicode.cpp</PATH>
+                    <PATH>nsUTF32ToUnicode.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
            </GROUP>
@ -8412,13 +8358,7 @@
                <FILEREF>
                    <TARGETNAME>uconv.shlb</TARGETNAME>
                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4LE.cpp</PATH>
-                    <PATHFORMAT>MacOS</PATHFORMAT>
-                </FILEREF>
-                <FILEREF>
-                    <TARGETNAME>uconv.shlb</TARGETNAME>
-                    <PATHTYPE>Name</PATHTYPE>
-                    <PATH>nsUnicodeToUCS4BE.cpp</PATH>
+                    <PATH>nsUnicodeToUTF32.cpp</PATH>
                    <PATHFORMAT>MacOS</PATHFORMAT>
                </FILEREF>
            </GROUP>
--- a/intl/uconv/src/nsUConvModule.cpp
+++ b/intl/uconv/src/nsUConvModule.cpp
@ -126,8 +126,7 @@
 #include "nsVPSToUnicode.h"
 #include "nsUTF7ToUnicode.h"
 #include "nsMUTF7ToUnicode.h"
-#include "nsUCS4BEToUnicode.h"
-#include "nsUCS4LEToUnicode.h"
+#include "nsUTF32ToUnicode.h"
 #include "nsUCS2BEToUnicode.h"
 #include "nsUCS2LEToUnicode.h"
 #include "nsT61ToUnicode.h"
@ -180,8 +179,7 @@
 #include "nsUnicodeToMUTF7.h"
 #include "nsUnicodeToUCS2BE.h"
 #include "nsUnicodeToUCS2LE.h"
-#include "nsUnicodeToUCS4BE.h"
-#include "nsUnicodeToUCS4LE.h"
+#include "nsUnicodeToUTF32.h"
 #include "nsUnicodeToT61.h"
 #include "nsUnicodeToUserDefined.h"
 #include "nsUnicodeToSymbol.h"
@ -437,8 +435,8 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF7ToUnicode);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsMUTF7ToUnicode);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16BEToUnicode);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF16LEToUnicode);
-NS_GENERIC_FACTORY_CONSTRUCTOR(nsUCS4BEToUnicode);
-NS_GENERIC_FACTORY_CONSTRUCTOR(nsUCS4LEToUnicode);
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32BEToUnicode);
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsUTF32LEToUnicode);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUEscape);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUEscapeToUnicode);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF7);
@ -446,8 +444,8 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToMUTF7);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16BE);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16LE);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF16);
-NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUCS4BE);
-NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUCS4LE);
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32BE);
+NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToUTF32LE);
 NS_GENERIC_FACTORY_CONSTRUCTOR(nsUnicodeToLangBoxArabic8);

 // ucvibm
@ -958,12 +956,12 @@ static const nsModuleComponentInfo components[] =
  { 
    DECODER_NAME_BASE "UTF-32BE" , NS_UTF32BETOUNICODE_CID, 
    NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32BE",
-    nsUCS4BEToUnicodeConstructor ,
+    nsUTF32BEToUnicodeConstructor ,
  },
  { 
    DECODER_NAME_BASE "UTF-32LE" , NS_UTF32LETOUNICODE_CID, 
    NS_UNICODEDECODER_CONTRACTID_BASE "UTF-32LE",
-    nsUCS4LEToUnicodeConstructor ,
+    nsUTF32LEToUnicodeConstructor ,
  },
  { 
    DECODER_NAME_BASE "T.61-8bit" , NS_T61TOUNICODE_CID, 
@ -1258,12 +1256,12 @@ static const nsModuleComponentInfo components[] =
  { 
    ENCODER_NAME_BASE "UTF-32BE" , NS_UNICODETOUTF32BE_CID, 
    NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32BE",
-    nsUnicodeToUCS4BEConstructor, 
+    nsUnicodeToUTF32BEConstructor, 
  },
  { 
    ENCODER_NAME_BASE "UTF-32LE" , NS_UNICODETOUTF32LE_CID, 
    NS_UNICODEENCODER_CONTRACTID_BASE "UTF-32LE",
-    nsUnicodeToUCS4LEConstructor, 
+    nsUnicodeToUTF32LEConstructor, 
  },
    { 
    ENCODER_NAME_BASE "T.61-8bit" , NS_UNICODETOT61_CID, 
--- a/intl/uconv/ucvlatin/Makefile.in
+++ b/intl/uconv/ucvlatin/Makefile.in
@ -93,8 +93,7 @@ CPPSRCS		= \
 		nsMUTF7ToUnicode.cpp \
 		nsUCS2BEToUnicode.cpp \
 		nsUCS2LEToUnicode.cpp \
-		nsUCS4BEToUnicode.cpp \
-		nsUCS4LEToUnicode.cpp \
+		nsUTF32ToUnicode.cpp \
 		nsT61ToUnicode.cpp \
 		nsUserDefinedToUnicode.cpp \
 		nsUnicodeToUEscape.cpp \
@ -155,8 +154,7 @@ CPPSRCS		= \
 		nsUnicodeToMUTF7.cpp \
 		nsUnicodeToUCS2BE.cpp \
 		nsUnicodeToUCS2LE.cpp \
-		nsUnicodeToUCS4BE.cpp \
-		nsUnicodeToUCS4LE.cpp \
+		nsUnicodeToUTF32.cpp \
 		nsUnicodeToT61.cpp \
 		nsUnicodeToUserDefined.cpp \
 		nsUnicodeToSymbol.cpp \
--- a/intl/uconv/ucvlatin/nsUCS4BEToUnicode.cpp
+++ b/intl/uconv/ucvlatin/nsUCS4BEToUnicode.cpp
--- a/intl/uconv/ucvlatin/nsUCS4BEToUnicode.h
+++ b/intl/uconv/ucvlatin/nsUCS4BEToUnicode.h
--- a/intl/uconv/ucvlatin/nsUCS4LEToUnicode.cpp
+++ b/intl/uconv/ucvlatin/nsUCS4LEToUnicode.cpp
--- a/intl/uconv/ucvlatin/nsUCS4LEToUnicode.h
+++ b/intl/uconv/ucvlatin/nsUCS4LEToUnicode.h
--- a/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp
+++ b/intl/uconv/ucvlatin/nsUTF32ToUnicode.cpp
@ -0,0 +1,237 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:expandtab:shiftwidth=2:tabstop=2: 
+ */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: NPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is 
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s): Jungshik Shin <jshin@mailaps.org>
+ *
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the NPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the NPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nsUCSupport.h"
+#include "nsUTF32ToUnicode.h"
+#include <string.h>
+
+//----------------------------------------------------------------------
+// static functions and macro definition common to nsUTF32(BE|LE)ToUnicode
+
+#ifdef IS_BIG_ENDIAN
+#define LE_STRING_TO_UCS4(s)                                       \
+        (PRUint8(*(s)) | (PRUint8(*((s) + 1)) << 8) |              \
+         (PRUint8(*((s) + 2)) << 16) | (PRUint8(*((s) + 3)) << 24))
+#else
+#define LE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
+#endif
+
+#ifdef IS_BIG_ENDIAN
+#define BE_STRING_TO_UCS4(s) (*(PRUint32*) (s))
+#else
+#define BE_STRING_TO_UCS4(s)                                       \
+        (PRUint8(*((s) + 3)) | (PRUint8(*((s) + 2)) << 8) |         \
+         (PRUint8(*((s) + 1)) << 16) | (PRUint8(*(s)) << 24))
+#endif
+ 
+static nsresult ConvertCommon(const char * aSrc, 
+                              PRInt32 * aSrcLength, 
+                              PRUnichar * aDest, 
+                              PRInt32 * aDestLength,
+                              PRUint16 * aState,
+                              PRUint8  * aBuffer,
+                              PRBool aIsLE)
+{
+   
+  NS_ENSURE_TRUE(*aState < 4, NS_ERROR_INVALID_ARG);
+  NS_ENSURE_TRUE(*aDestLength > 0, NS_ERROR_INVALID_ARG);
+
+  const char *src = aSrc;
+  const char *srcEnd = aSrc + *aSrcLength;
+   
+  PRUnichar *dest = aDest;
+  PRUnichar *destEnd = aDest + *aDestLength;
+
+  if (*aState > *aSrcLength) 
+  {
+    memcpy(aBuffer + 4 - *aState, src, *aSrcLength);
+    *aDestLength = 0;
+    *aState -= *aSrcLength;
+    return NS_OK_UDEC_MOREINPUT;
+  }
+
+  PRUint32 ucs4;
+
+  // prev. run left a partial UTF-32 seq. 
+  if (*aState > 0)
+  {
+    memcpy(aBuffer + 4 - *aState, src, *aState);
+    ucs4 =  aIsLE ? LE_STRING_TO_UCS4(aBuffer) : BE_STRING_TO_UCS4(aBuffer); 
+    if (ucs4 < 0x10000L)  // BMP
+    {
+      // XXX Do we have to convert surrogate code points to the replacement
+      // character (0xfffd)?  
+      *dest++= PRUnichar(ucs4);
+    }
+    else if (ucs4 < 0x110000L)  // plane 1 through plane 16 
+    {
+      if (destEnd - dest < 2) 
+      {
+        *aSrcLength = 0;
+        *aDestLength = 0;
+        return NS_OK_UDEC_MOREOUTPUT;
+      }
+      // ((ucs4 - 0x10000) >> 10) + 0xd800;
+      *dest++= PRUnichar((ucs4 >> 10) + 0xd7c0);  // high surrogate
+      *dest++= PRUnichar(ucs4 & 0x3ffL | 0xdc00); // low surrogate
+    }       
+    // Codepoints in plane 17 and higher (> 0x10ffff)
+    // are not representable in UTF-16 we use for the internal
+    // character representation. This is not a problem
+    // because Unicode/ISO 10646 will never assign characters
+    // in plane 17 and higher. Therefore, we convert them
+    // to Unicode replacement character (0xfffd).
+    else                   
+      *dest++ = 0xfffd;
+    src += *aState;
+    *aState = 0;
+  }
+
+  nsresult rv = NS_OK;  // conversion result
+
+  for ( ; src < srcEnd && dest < destEnd; src += 4)
+  {
+    if (srcEnd - src < 4) 
+    {
+      // fill up aBuffer until src buffer gets exhausted.
+      memcpy(aBuffer, src, srcEnd - src);
+      *aState = 4 - (srcEnd - src); // set add. char to read in next run
+      src = srcEnd;
+      rv = NS_OK_UDEC_MOREINPUT;
+      break;
+    }
+
+    ucs4 =  aIsLE ? LE_STRING_TO_UCS4(src) : BE_STRING_TO_UCS4(src); 
+    if (ucs4 < 0x10000L)  // BMP
+    {
+      // XXX Do we have to convert surrogate code points to the replacement
+      // character (0xfffd)?  
+      *dest++= PRUnichar(ucs4);
+    }
+    else if (ucs4 < 0x110000L)  // plane 1 through plane 16 
+    {
+      if (destEnd - dest < 2) 
+        break;
+      // ((ucs4 - 0x10000) >> 10) + 0xd800;
+      *dest++= PRUnichar((ucs4 >> 10) + 0xd7c0); 
+      *dest++= PRUnichar(ucs4 & 0x3ffL | 0xdc00);
+    }       
+    else                       // plane 17 and higher
+      *dest++ = 0xfffd;
+  }
+
+  //output not finished, output buffer too short
+  if((NS_OK == rv) && (src < srcEnd) && (dest >= destEnd)) 
+    rv = NS_OK_UDEC_MOREOUTPUT;
+
+  *aSrcLength = src - aSrc;
+  *aDestLength  = dest - aDest;
+
+  return rv;
+}
+
+
+//----------------------------------------------------------------------
+// Class nsUTF32ToUnicode [implementation]
+
+nsUTF32ToUnicode::nsUTF32ToUnicode() : nsBasicDecoderSupport()
+{
+  Reset();
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsUTF32ToUnicode::GetMaxLength(const char * aSrc, 
+                                            PRInt32 aSrcLength, 
+                                            PRInt32 * aDestLength)
+{
+  // Non-BMP characters take two PRUnichars(a pair of surrogate codepoints)
+  // so that we have to divide by 2 instead of 4 for the worst case.
+  *aDestLength = aSrcLength / 2;
+  return NS_OK;
+}
+
+
+//----------------------------------------------------------------------
+// Subclassing of nsBasicDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsUTF32ToUnicode::Reset()
+{
+  // the number of additional bytes to read to complete UTF-32 4byte seq.
+  mState = 0;  
+  memset(mBufferInc, 0, 4);
+  return NS_OK;
+
+}
+
+
+//----------------------------------------------------------------------
+// Class nsUTF32BEToUnicode [implementation]
+
+//----------------------------------------------------------------------
+// Subclassing of nsUTF32ToUnicode class [implementation]
+
+NS_IMETHODIMP nsUTF32BEToUnicode::Convert(const char * aSrc, 
+                                          PRInt32 * aSrcLength, 
+                                          PRUnichar * aDest, 
+                                          PRInt32 * aDestLength)
+{
+  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState, 
+                       mBufferInc, PR_FALSE);
+}
+
+//----------------------------------------------------------------------
+// Class nsUTF32LEToUnicode [implementation]
+  
+//----------------------------------------------------------------------
+// Subclassing of nsUTF32ToUnicode class [implementation]
+
+NS_IMETHODIMP nsUTF32LEToUnicode::Convert(const char * aSrc, 
+                                          PRInt32 * aSrcLength, 
+                                          PRUnichar * aDest, 
+                                          PRInt32 * aDestLength)
+{
+  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, &mState, 
+                       mBufferInc, PR_TRUE);
+}
+
+// XXX : What to do with 'unflushed' mBufferInc?? : Finish()
+  
--- a/intl/uconv/ucvlatin/nsUTF32ToUnicode.h
+++ b/intl/uconv/ucvlatin/nsUTF32ToUnicode.h
@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:expandtab:shiftwidth=2:tabstop=2:
+ */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: NPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is 
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s): Jungshik Shin <jshin@mailaps.org>
+ *
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the NPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the NPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsUTF32ToUnicode_h___
+#define nsUTF32ToUnicode_h___
+
+//----------------------------------------------------------------------
+// Class nsUTF32ToUnicode [declaration]  
+
+/**
+ * A character set converter from UTF32 to Unicode.
+ * The base class for UTF32BE/UTF32LE to Unicode converters.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUTF32ToUnicode : public nsBasicDecoderSupport
+{
+
+public:
+
+  /**
+   * Class constructor.
+   */
+  nsUTF32ToUnicode();
+
+protected:
+
+  // the number of additional bytes to read to complete an incomplete UTF-32 4byte seq.
+  PRUint16 mState;  
+  // buffer for an incomplete UTF-32 sequence. 
+  PRUint8  mBufferInc[4];
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsBasicDecoderSupport class [declaration]
+
+  NS_IMETHOD GetMaxLength(const char * aSrc, PRInt32 aSrcLength, 
+                          PRInt32 * aDestLength);
+
+  NS_IMETHOD Reset();
+
+};
+
+//----------------------------------------------------------------------
+// Class nsUTF32BEToUnicode [declaration]  
+
+/**
+ * A character set converter from UTF32BE to Unicode.
+ * A subclass of UTF32ToUnicode.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUTF32BEToUnicode : public nsUTF32ToUnicode
+{
+public:
+
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsBasicDecoderSupport class [declaration]
+
+  NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength, 
+                     PRUnichar * aDest, PRInt32 * aDestLength);
+
+
+};
+
+//----------------------------------------------------------------------
+// Class nsUTF32LEToUnicode [declaration]  
+
+/**
+ * A character set converter from UTF32LE to Unicode.
+ * A subclass of UTF32ToUnicode.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUTF32LEToUnicode : public nsUTF32ToUnicode
+{
+public:
+
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsBasicDecoderSupport class [declaration]
+
+  NS_IMETHOD Convert(const char * aSrc, PRInt32 * aSrcLength, 
+                     PRUnichar * aDest, PRInt32 * aDestLength);
+
+};
+
+#endif /* nsUTF32ToUnicode_h___ */
+
--- a/intl/uconv/ucvlatin/nsUnicodeToUCS4BE.cpp
+++ b/intl/uconv/ucvlatin/nsUnicodeToUCS4BE.cpp
--- a/intl/uconv/ucvlatin/nsUnicodeToUCS4BE.h
+++ b/intl/uconv/ucvlatin/nsUnicodeToUCS4BE.h
--- a/intl/uconv/ucvlatin/nsUnicodeToUCS4LE.cpp
+++ b/intl/uconv/ucvlatin/nsUnicodeToUCS4LE.cpp
--- a/intl/uconv/ucvlatin/nsUnicodeToUCS4LE.h
+++ b/intl/uconv/ucvlatin/nsUnicodeToUCS4LE.h
--- a/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp
+++ b/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp
@ -0,0 +1,262 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:expandtab:shiftwidth=2:tabstop=2: 
+ */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: NPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is 
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s): Jungshik Shin <jshin@mailaps.org>
+ *
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the NPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the NPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include <string.h>
+#include "nsUCSupport.h"
+#include "nsUnicodeToUTF32.h"
+
+#ifdef IS_BIG_ENDIAN
+#define UCS4_TO_LE_STRING(u, s)           \
+  PR_BEGIN_MACRO                          \
+    s[3] = PRUint8(((u) >> 24) & 0xffL);  \
+    s[2] = PRUint8(((u) >> 16) & 0xffL);  \
+    s[1] = PRUint8(((u) >> 8) & 0xffL);   \
+    s[0] = PRUint8((u) & 0xffL);          \
+  PR_END_MACRO
+#else 
+#define UCS4_TO_LE_STRING(u, s)           \
+  PR_BEGIN_MACRO                          \
+    *((PRUint32*)(s)) = (u);              \
+  PR_END_MACRO
+#endif
+
+#ifdef IS_BIG_ENDIAN
+#define UCS4_TO_BE_STRING(u, s)           \
+  PR_BEGIN_MACRO                          \
+    *((PRUint32*)(s)) = (u);              \
+  PR_END_MACRO
+#else
+#define UCS4_TO_BE_STRING(u, s)           \
+  PR_BEGIN_MACRO                          \
+    s[0] = PRUint8(((u) >> 24) & 0xffL);  \
+    s[1] = PRUint8(((u) >> 16) & 0xffL);  \
+    s[2] = PRUint8(((u) >> 8) & 0xffL);   \
+    s[3] = PRUint8((u) & 0xffL);          \
+  PR_END_MACRO
+#endif
+
+//----------------------------------------------------------------------
+// Static functions common to nsUnicodeToUTF32LE and nsUnicodeToUTF32BE
+ 
+static nsresult ConvertCommon(const PRUnichar * aSrc, 
+                              PRInt32 * aSrcLength, 
+                              char * aDest, 
+                              PRInt32 * aDestLength,
+                              PRUnichar * aHighSurrogate,
+                              PRBool aIsLE)
+{
+  const PRUnichar * src = aSrc;
+  const PRUnichar * srcEnd = aSrc + *aSrcLength;
+  char * dest = aDest;
+  const char * destEnd = aDest + *aDestLength; 
+  PRUint32 ucs4;
+
+
+  // left-over high surroage code point from the prev. run.
+  if (*aHighSurrogate) 
+  {
+    if (! *aSrcLength)
+    {
+      *aDestLength = 0;
+      return NS_OK_UENC_MOREINPUT;
+    }
+    if (*aDestLength < 4) 
+    {
+      *aSrcLength = 0;
+      *aDestLength = 0;
+      return NS_OK_UENC_MOREOUTPUT;
+    }
+    if ((*src & 0xfc00) != 0xdc00) // Not a low surrogate codepoint. Unpaird.
+      ucs4 = PRUint32(*aHighSurrogate);
+    else 
+      ucs4 = (((*aHighSurrogate & 0x3ffL) << 10) | (*src & 0x3ffL)) + 0x10000;
+
+    ++src;
+    if (aIsLE)
+      UCS4_TO_LE_STRING(ucs4, dest);
+    else
+      UCS4_TO_BE_STRING(ucs4, dest);
+    dest += 4;
+    *aHighSurrogate = 0;
+  }
+
+  while (src < srcEnd) {
+    // regular codepoint or an unpaired low surrogate
+    if ((src[0] & 0xfc00) != 0xd800) 
+    {
+      if (destEnd - dest < 4)
+        goto error_more_output;
+      ucs4 = PRUint32(src[0]);  
+    }
+    else  // high surrogate
+    {
+      if ((src+1) >= srcEnd) {
+        //we need another surrogate to complete this unicode char
+        *aHighSurrogate = src[0];
+        *aDestLength = dest - aDest;
+        return NS_OK_UENC_MOREINPUT;
+      }
+      //handle surrogate
+      if (destEnd - dest < 4)
+        goto error_more_output;
+      if ((src[1] & 0xfc00) != 0xdc00)  // unpaired 
+        ucs4 = PRUint32(src[0]);  
+      else 
+      {  // convert surrogate pair to UCS4
+        ucs4 = (((src[0] & 0x3ffL) << 10) | (src[1] & 0x3ffL)) + 0x10000;
+        *aHighSurrogate = 0;
+        ++src;
+      }
+    }
+    if (aIsLE)
+      UCS4_TO_LE_STRING(ucs4, dest);
+    else
+      UCS4_TO_BE_STRING(ucs4, dest);
+    dest += 4;
+    ++src;
+  }
+
+  *aDestLength = dest - aDest;
+  return NS_OK;
+
+error_more_output:
+  *aSrcLength = src - aSrc;
+  *aDestLength = dest - aDest;
+  return NS_OK_UENC_MOREOUTPUT;
+
+}
+
+static nsresult FinishCommon(char * aDest, 
+                             PRInt32 * aDestLength, 
+                             PRUnichar * aHighSurrogate,
+                             PRBool aIsLE)
+{
+  char * dest = aDest;
+
+  if (*aHighSurrogate) {
+    if (*aDestLength < 4) {
+      *aDestLength = 0;
+      return NS_OK_UENC_MOREOUTPUT;
+    }
+    PRUint32 high = PRUint32(*aHighSurrogate);
+    if (aIsLE)
+      UCS4_TO_LE_STRING(high, dest);
+    else
+      UCS4_TO_BE_STRING(high, dest);
+    *aHighSurrogate = 0;
+    *aDestLength = 4;
+    return NS_OK;
+  } 
+
+  *aDestLength  = 0;
+  return NS_OK;
+}
+
+
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32 [implementation]
+
+NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32, nsIUnicodeEncoder);
+
+
+//----------------------------------------------------------------------
+// Subclassing of nsIUnicodeEncoder class [implementation]
+
+NS_IMETHODIMP nsUnicodeToUTF32::GetMaxLength(const PRUnichar * aSrc, 
+                                            PRInt32 aSrcLength, 
+                                            PRInt32 * aDestLength)
+{
+  *aDestLength = aSrcLength * 4;
+  return NS_OK;
+}
+
+NS_IMETHODIMP nsUnicodeToUTF32::FillInfo(PRUint32 *aInfo)
+{
+  memset(aInfo, 0xFF, (0x10000L >> 3));
+  return NS_OK;
+}
+
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32BE [implementation]
+
+//----------------------------------------------------------------------
+// Subclassing of nsUnicodeToUTF32 class [implementation]
+  
+
+NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc, 
+                                          PRInt32 * aSrcLength, 
+                                          char * aDest, 
+                                          PRInt32 * aDestLength)
+{
+  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, 
+                       &mHighSurrogate, PR_FALSE);
+}
+
+NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest, 
+                                         PRInt32 * aDestLength)
+{
+  return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_FALSE);
+}
+
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32LE [implementation]
+  
+//----------------------------------------------------------------------
+// Subclassing of nsUnicodeToUTF32 class [implementation]
+
+
+NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc, 
+                                          PRInt32 * aSrcLength, 
+                                          char * aDest, 
+                                          PRInt32 * aDestLength)
+{
+  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength, 
+                       &mHighSurrogate, PR_TRUE);
+}
+
+NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest, 
+                                         PRInt32 * aDestLength)
+{
+  return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_TRUE);
+}
+
--- a/intl/uconv/ucvlatin/nsUnicodeToUTF32.h
+++ b/intl/uconv/ucvlatin/nsUnicodeToUTF32.h
@ -0,0 +1,131 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:expandtab:shiftwidth=2:tabstop=2:
+ */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: NPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Netscape Public License
+ * Version 1.1 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/NPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Communicator client code.
+ *
+ * The Initial Developer of the Original Code is 
+ * Netscape Communications Corporation.
+ * Portions created by the Initial Developer are Copyright (C) 1998
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s): Jungshik Shin <jshin@mailaps.org>
+ *
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the NPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the NPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef nsUnicodeToUTF32_h___
+#define nsUnicodeToUTF32_h___
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32 [declaration]  
+
+/**
+ * A character set converter from UTF32 to Unicode.
+ * The base class for UTF32BE/UTF32LE to Unicode converters.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUnicodeToUTF32 : public nsIUnicodeEncoder
+{
+   NS_DECL_ISUPPORTS
+
+public:
+
+ /**
+   * Class constructor.
+   */
+  nsUnicodeToUTF32() {mHighSurrogate = 0;};
+  virtual ~nsUnicodeToUTF32() {};
+
+protected:
+  PRUnichar  mHighSurrogate;
+
+  NS_IMETHOD GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength, 
+                          PRInt32 * aDestLength);
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsIUnicodeEncoder class [declaration]
+
+  NS_IMETHOD Reset() {mHighSurrogate = 0; return NS_OK;};
+  NS_IMETHOD FillInfo(PRUint32* aInfo);
+  NS_IMETHOD SetOutputErrorBehavior(PRInt32 aBehavior, 
+                                    nsIUnicharEncoder * aEncoder, 
+                                    PRUnichar aChar) 
+                                    {return NS_OK;};
+
+};
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32BE [declaration]  
+
+/**
+ * A character set converter from Unicode to UTF32BE.
+ * A subclass of UnicodeToUTF32.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUnicodeToUTF32BE : public nsUnicodeToUTF32
+{
+public:
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsIUnicodeEncoder class [declaration]
+
+  NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength, 
+                     char * aDest, PRInt32 * aDestLength);
+  NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
+
+
+};
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF32LE [declaration]  
+
+/**
+ * A character set converter from Unicode to UTF32LE.
+ * A subclass of UnicodeToUTF32.
+ * @created         08/Dec/2002
+ * @author  Jungshik Shin
+ */
+
+class nsUnicodeToUTF32LE : public nsUnicodeToUTF32
+{
+public:
+
+  //--------------------------------------------------------------------
+  // Subclassing of nsIUnicodeEncoder class [declaration]
+  NS_IMETHOD Convert(const PRUnichar * aSrc, PRInt32 * aSrcLength, 
+                     char * aDest, PRInt32 * aDestLength);
+  NS_IMETHOD Finish(char * aDest, PRInt32 * aDestLength);
+
+};
+
+#endif /* nsUnicodeToUTF32_h___ */
+