pjs/intl/uconv/ucvlatin/nsUnicodeToUTF32.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
 */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: NPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Netscape Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/NPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Communicator client code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s): Jungshik Shin <jshin@mailaps.org>
 *
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the NPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the NPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#include <string.h>
#include "nsUCSupport.h"
#include "nsUnicodeToUTF32.h"

#ifdef IS_BIG_ENDIAN
#define UCS4_TO_LE_STRING(u, s)           \
  PR_BEGIN_MACRO                          \
    s[3] = PRUint8(((u) >> 24) & 0xffL);  \
    s[2] = PRUint8(((u) >> 16) & 0xffL);  \
    s[1] = PRUint8(((u) >> 8) & 0xffL);   \
    s[0] = PRUint8((u) & 0xffL);          \
  PR_END_MACRO
#else
#define UCS4_TO_LE_STRING(u, s)           \
  PR_BEGIN_MACRO                          \
    *((PRUint32*)(s)) = (u);              \
  PR_END_MACRO
#endif

#ifdef IS_BIG_ENDIAN
#define UCS4_TO_BE_STRING(u, s)           \
  PR_BEGIN_MACRO                          \
    *((PRUint32*)(s)) = (u);              \
  PR_END_MACRO
#else
#define UCS4_TO_BE_STRING(u, s)           \
  PR_BEGIN_MACRO                          \
    s[0] = PRUint8(((u) >> 24) & 0xffL);  \
    s[1] = PRUint8(((u) >> 16) & 0xffL);  \
    s[2] = PRUint8(((u) >> 8) & 0xffL);   \
    s[3] = PRUint8((u) & 0xffL);          \
  PR_END_MACRO
#endif

//----------------------------------------------------------------------
// Static functions common to nsUnicodeToUTF32LE and nsUnicodeToUTF32BE

static nsresult ConvertCommon(const PRUnichar * aSrc,
                              PRInt32 * aSrcLength,
                              char * aDest,
                              PRInt32 * aDestLength,
                              PRUnichar * aHighSurrogate,
                              PRBool aIsLE)
{
  const PRUnichar * src = aSrc;
  const PRUnichar * srcEnd = aSrc + *aSrcLength;
  char * dest = aDest;
  const char * destEnd = aDest + *aDestLength;
  PRUint32 ucs4;


  // left-over high surroage code point from the prev. run.
  if (*aHighSurrogate)
  {
    if (! *aSrcLength)
    {
      *aDestLength = 0;
      return NS_OK_UENC_MOREINPUT;
    }
    if (*aDestLength < 4)
    {
      *aSrcLength = 0;
      *aDestLength = 0;
      return NS_OK_UENC_MOREOUTPUT;
    }
    if ((*src & 0xfc00) != 0xdc00) // Not a low surrogate codepoint. Unpaird.
      ucs4 = PRUint32(*aHighSurrogate);
    else
      ucs4 = (((*aHighSurrogate & 0x3ffL) << 10) | (*src & 0x3ffL)) + 0x10000;

    ++src;
    if (aIsLE)
      UCS4_TO_LE_STRING(ucs4, dest);
    else
      UCS4_TO_BE_STRING(ucs4, dest);
    dest += 4;
    *aHighSurrogate = 0;
  }

  while (src < srcEnd) {
    // regular codepoint or an unpaired low surrogate
    if ((src[0] & 0xfc00) != 0xd800)
    {
      if (destEnd - dest < 4)
        goto error_more_output;
      ucs4 = PRUint32(src[0]);
    }
    else  // high surrogate
    {
      if ((src+1) >= srcEnd) {
        //we need another surrogate to complete this unicode char
        *aHighSurrogate = src[0];
        *aDestLength = dest - aDest;
        return NS_OK_UENC_MOREINPUT;
      }
      //handle surrogate
      if (destEnd - dest < 4)
        goto error_more_output;
      if ((src[1] & 0xfc00) != 0xdc00)  // unpaired
        ucs4 = PRUint32(src[0]);
      else
      {  // convert surrogate pair to UCS4
        ucs4 = (((src[0] & 0x3ffL) << 10) | (src[1] & 0x3ffL)) + 0x10000;
        *aHighSurrogate = 0;
        ++src;
      }
    }
    if (aIsLE)
      UCS4_TO_LE_STRING(ucs4, dest);
    else
      UCS4_TO_BE_STRING(ucs4, dest);
    dest += 4;
    ++src;
  }

  *aDestLength = dest - aDest;
  return NS_OK;

error_more_output:
  *aSrcLength = src - aSrc;
  *aDestLength = dest - aDest;
  return NS_OK_UENC_MOREOUTPUT;

}

static nsresult FinishCommon(char * aDest,
                             PRInt32 * aDestLength,
                             PRUnichar * aHighSurrogate,
                             PRBool aIsLE)
{
  char * dest = aDest;

  if (*aHighSurrogate) {
    if (*aDestLength < 4) {
      *aDestLength = 0;
      return NS_OK_UENC_MOREOUTPUT;
    }
    PRUint32 high = PRUint32(*aHighSurrogate);
    if (aIsLE)
      UCS4_TO_LE_STRING(high, dest);
    else
      UCS4_TO_BE_STRING(high, dest);
    *aHighSurrogate = 0;
    *aDestLength = 4;
    return NS_OK;
  }

  *aDestLength  = 0;
  return NS_OK;
}


//----------------------------------------------------------------------
// Class nsUnicodeToUTF32 [implementation]

NS_IMPL_ISUPPORTS1(nsUnicodeToUTF32, nsIUnicodeEncoder)


//----------------------------------------------------------------------
// Subclassing of nsIUnicodeEncoder class [implementation]

NS_IMETHODIMP nsUnicodeToUTF32::GetMaxLength(const PRUnichar * aSrc,
                                            PRInt32 aSrcLength,
                                            PRInt32 * aDestLength)
{
  *aDestLength = aSrcLength * 4;
  return NS_OK;
}

NS_IMETHODIMP nsUnicodeToUTF32::FillInfo(PRUint32 *aInfo)
{
  memset(aInfo, 0xFF, (0x10000L >> 3));
  return NS_OK;
}


//----------------------------------------------------------------------
// Class nsUnicodeToUTF32BE [implementation]

//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]


NS_IMETHODIMP nsUnicodeToUTF32BE::Convert(const PRUnichar * aSrc,
                                          PRInt32 * aSrcLength,
                                          char * aDest,
                                          PRInt32 * aDestLength)
{
  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
                       &mHighSurrogate, PR_FALSE);
}

NS_IMETHODIMP nsUnicodeToUTF32BE::Finish(char * aDest,
                                         PRInt32 * aDestLength)
{
  return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_FALSE);
}


//----------------------------------------------------------------------
// Class nsUnicodeToUTF32LE [implementation]

//----------------------------------------------------------------------
// Subclassing of nsUnicodeToUTF32 class [implementation]


NS_IMETHODIMP nsUnicodeToUTF32LE::Convert(const PRUnichar * aSrc,
                                          PRInt32 * aSrcLength,
                                          char * aDest,
                                          PRInt32 * aDestLength)
{
  return ConvertCommon(aSrc, aSrcLength, aDest, aDestLength,
                       &mHighSurrogate, PR_TRUE);
}

NS_IMETHODIMP nsUnicodeToUTF32LE::Finish(char * aDest,
                                         PRInt32 * aDestLength)
{
  return FinishCommon(aDest, aDestLength, &mHighSurrogate, PR_TRUE);
}