pjs/intl/uconv/ucvko/nsUnicodeToJamoTTF.cpp

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:expandtab:shiftwidth=2:tabstop=2:
 */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla Communicator client code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corp.
 * Portions created by the Initial Developer are Copyright (C) 2003
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Jungshik Shin <jshin@mailaps.org>
 *   Frank Tang <ftang@netscape.com>
 *   Jin-Hwan Cho <chofchof@ktug.or.kr>
 *   Won-Kyu Park  <wkpark@chem.skku.ac.kr>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

/*
 * - Purposes:
 *    1. Enable rendering  over 1.5 million Hangul syllables with
 *       UnBatang and other fonts made available by UN KoaungHi
 *       and PARK Won-kyu.
 */
#include "nsUCvKODll.h"
#include "nsUnicodeToJamoTTF.h"
#include "prmem.h"
#include "nsXPIDLString.h"
#include "prtypes.h"
#include "nscore.h"
#include "nsISupportsUtils.h"
#include "nsCOMPtr.h"
#include "nsIUnicodeDecoder.h"
#include "nsServiceManagerUtils.h"
#include "nsICharsetConverterManager.h"
#include "nsICharRepresentable.h"
#include <string.h>

typedef struct {
  PRUint8 seq[3];
  PRUint8 liga;
} JamoNormMap;

// cluster maps
#include "jamoclusters.h"

// Constants for Hangul Jamo/syllable handling taken from Unicode 3.0
// section 3.11

#define LBASE 0x1100
#define VBASE 0x1161
#define TBASE 0x11A7
#define TSTART 0x11A8
#define SBASE 0xAC00

#define LCOUNT 19
#define VCOUNT 21
#define TCOUNT 28
#define SCOUNT (LCOUNT * VCOUNT * TCOUNT)
#define SEND (SBASE + SCOUNT - 1)


#define LFILL 0x115F
#define VFILL 0x1160

#define IS_LC(wc) (LBASE <= (wc) && (wc) <  VFILL)
#define IS_VO(wc) (VFILL <= (wc) && (wc) <  TSTART)
#define IS_TC(wc) (TSTART <= (wc) && (wc) <= 0x11FF)
#define IS_JAMO(wc)   (IS_LC(wc) || IS_VO(wc) || IS_TC(wc))

// Jamos used in modern precomposed syllables
#define IS_SYL_LC(wc) (LBASE <= (wc) && (wc) <  LBASE + LCOUNT)
#define IS_SYL_VO(wc) (VBASE <= (wc) && (wc) <  VBASE + VCOUNT)
#define IS_SYL_TC(wc) (TBASE <  (wc) && (wc) <= TBASE + TCOUNT)

// Modern precomposed syllables.
#define IS_SYL(wc)   (SBASE <= (wc) && (wc) <= SEND)
#define IS_SYL_WO_TC(wc)  (((wc) - SBASE) % TCOUNT == 0)
#define IS_SYL_WITH_TC(wc)  (((wc) - SBASE) % TCOUNT)

// Compose precomposed syllables out of L, V, and T.
#define SYL_FROM_LVT(l,v,t) (SBASE + \
                             (((l) - LBASE) * VCOUNT + (v) - VBASE) * TCOUNT + \
                             (t) - TBASE)

// Hangul tone marks
#define HTONE1 0x302E
#define HTONE2 0x302F

#define IS_TONE(wc) ((wc) == HTONE1 || (wc) == HTONE2)

// Below are constants for rendering with UnBatang-like fonts.

#define LC_TMPPOS  0xF000 // temp. block for leading consonants
#define VO_TMPPOS  0xF100 // temp. block for vowels
#define TC_TMPPOS  0xF200 // temp. block for trailinng consonants
#define LC_OFFSET  (LC_TMPPOS-LBASE)
#define VO_OFFSET  (VO_TMPPOS-VFILL)
#define TC_OFFSET  (TC_TMPPOS-TSTART)

// Jamo class of *temporary* code points   in PUA for UnBatang-like fonts.
#define IS_LC_EXT(wc) ( ((wc) & 0xFF00) == LC_TMPPOS )
#define IS_VO_EXT(wc) ( ((wc) & 0xFF00) == VO_TMPPOS )
#define IS_TC_EXT(wc) ( ((wc) & 0xFF00) == TC_TMPPOS )

// Glyph code point bases for L,V, and T in  UnBatang-like fonts
#define UP_LBASE 0xE000  // 0xE000 = Lfill, 0xE006 = Kiyeok
#define UP_VBASE 0xE300  // 0xE300 = Vfill, 0xE302 = Ah
#define UP_TBASE 0xE404  // 0xE400 = Tfill, 0xE404 = Kiyeok

// EUC-KR decoder for FillInfo.
static nsCOMPtr<nsIUnicodeDecoder> gDecoder = 0;

static inline void FillInfoRange     (PRUint32* aInfo, PRUint32 aStart,
                                      PRUint32 aEnd);
static nsresult     JamoNormalize    (const PRUnichar* aInSeq,
                                      PRUnichar** aOutSeq, PRInt32* aLength);
static void         JamosToExtJamos  (PRUnichar* aInSeq,  PRInt32* aLength);
static const JamoNormMap* JamoClusterSearch(JamoNormMap aKey,
                                            const JamoNormMap* aClusters,
                                            PRInt16 aClustersSize);
static nsresult     FillInfoEUCKR    (PRUint32 *aInfo, PRUint16 aHigh1,
                                      PRUint16 aHigh2);

static PRInt32      JamoNormMapComp  (const JamoNormMap& p1,
                                      const JamoNormMap& p2);
static PRInt16      JamoSrchReplace  (const JamoNormMap* aCluster,
                                      PRUint16 aSize, PRUnichar *aIn,
                                      PRInt32* aLength, PRUint16 aOffset);
static nsresult     GetDecoder       (nsIUnicodeDecoder** aDecoder);
static nsresult     ScanDecomposeSyllable (PRUnichar *aIn, PRInt32* aLength,
                                           const PRInt32 aMaxLen);

//----------------------------------------------------------------------
// Class nsUnicodeToJamoTTF [implementation]

NS_IMPL_ISUPPORTS2(nsUnicodeToJamoTTF, nsIUnicodeEncoder, nsICharRepresentable)

NS_IMETHODIMP
nsUnicodeToJamoTTF::SetOutputErrorBehavior(PRInt32 aBehavior,
                                           nsIUnicharEncoder *aEncoder,
                                           PRUnichar aChar)
{
  if (aBehavior == kOnError_CallBack && aEncoder == nsnull)
    return NS_ERROR_NULL_POINTER;
  mErrEncoder = aEncoder;
  mErrBehavior = aBehavior;
  mErrChar = aChar;
  return NS_OK;
}

// constructor and destructor

nsUnicodeToJamoTTF::nsUnicodeToJamoTTF()
{
  mJamos = nsnull;
  Reset();
}

nsUnicodeToJamoTTF::~nsUnicodeToJamoTTF()
{
  if (mJamos != nsnull && mJamos != mJamosStatic)
    PR_Free(mJamos);
}

enum KoCharClass {
  KO_CHAR_CLASS_LC,
  KO_CHAR_CLASS_VO,
  KO_CHAR_CLASS_TC,
  KO_CHAR_CLASS_SYL1,   // modern precomposed syllable w/o TC (LV type syl.)
  KO_CHAR_CLASS_SYL2,   // modern precomposed syllable with TC (LVT type syl.)
  KO_CHAR_CLASS_TONE,   // Tone marks
  KO_CHAR_CLASS_NOHANGUL, // Non-Hangul characters.
  KO_CHAR_CLASS_NUM
} ;

#define CHAR_CLASS(ch) \
  (IS_LC(ch) ? KO_CHAR_CLASS_LC   :  \
   IS_VO(ch) ? KO_CHAR_CLASS_VO   :  \
   IS_TC(ch) ? KO_CHAR_CLASS_TC   :  \
   IS_SYL(ch) ?                      \
    (IS_SYL_WITH_TC(ch) ? KO_CHAR_CLASS_SYL2 : KO_CHAR_CLASS_SYL1) : \
   IS_TONE(ch) ? KO_CHAR_CLASS_TONE : \
   KO_CHAR_CLASS_NOHANGUL)


// Grapheme boundary checker : See UTR #29 and Unicode 3.2 section 3.11
const static PRBool gIsBoundary[KO_CHAR_CLASS_NUM][KO_CHAR_CLASS_NUM] =
{// L  V  T  S1 S2 M  X
  { 0, 0, 1, 0, 0, 0, 1 }, // L
  { 1, 0, 0, 1, 1, 0, 1 }, // V
  { 1, 1, 0, 1, 1, 0, 1 }, // T
  { 1, 0, 0, 1, 1, 0, 1 }, // S1
  { 1, 1, 0, 1, 1, 0, 1 }, // S2
  { 1, 1, 1, 1, 1, 0, 1 }, // M
  { 1, 1, 1, 1, 1, 0, 1 }  // X
};


NS_IMETHODIMP
nsUnicodeToJamoTTF::Convert(const PRUnichar * aSrc,
                            PRInt32 * aSrcLength, char * aDest,
                            PRInt32 * aDestLength)
{
  nsresult rv = NS_OK;
  mByteOff = 0;

  // This should never happen, but it happens under MS Windows, somehow...
  if (mJamoCount > mJamosMaxLength)
  {
    NS_WARNING("mJamoCount > mJamoMaxLength on entering Convert()");
    Reset();
  }

  for (PRInt32 charOff = 0; charOff < *aSrcLength; charOff++)
  {
    PRUnichar ch = aSrc[charOff];

    // Syllable boundary check. Ref. : Unicode 3.2 section 3.11
    if (mJamoCount != 0 &&
        gIsBoundary[CHAR_CLASS(mJamos[mJamoCount - 1])][CHAR_CLASS(ch)])
    {
      composeHangul(aDest);
      mJamoCount = 0;
    }
    // Ignore tone marks other than the first in a sequence of tone marks.
    else if (mJamoCount != 0 && IS_TONE(mJamos[mJamoCount - 1]) && IS_TONE(ch))
    {
      --mJamoCount;
      composeHangul(aDest);
      mJamoCount = 0;

      // skip over tone marks from the second on in a series.
      while (IS_TONE(ch) && ++charOff < *aSrcLength)
        ch = aSrc[charOff];

      if (!IS_TONE(ch))
      {
        mJamos[mJamoCount++] = ch;
        continue;
      }
      else
        break;
    }

    if (mJamoCount == mJamosMaxLength)
    {
      mJamosMaxLength++;
      if (mJamos == mJamosStatic)
      {
        mJamos = (PRUnichar *) PR_Malloc(sizeof(PRUnichar) * mJamosMaxLength);
        if (!mJamos)
          return  NS_ERROR_OUT_OF_MEMORY;
        memcpy(mJamos, mJamosStatic, sizeof(PRUnichar) * mJamoCount);
      }
      else
      {
        mJamos = (PRUnichar *) PR_Realloc(mJamos,
                               sizeof(PRUnichar) * mJamosMaxLength);
        if (!mJamos)
          return  NS_ERROR_OUT_OF_MEMORY;
      }
    }

    mJamos[mJamoCount++] = ch;
  }

  if (mJamoCount != 0)
    composeHangul(aDest);
  mJamoCount = 0;
  *aDestLength = mByteOff;

  return rv;
}

NS_IMETHODIMP
nsUnicodeToJamoTTF::Finish(char* aDest, PRInt32* aDestLength)
{
  mByteOff = 0;
  if (mJamoCount != 0)
    composeHangul(aDest);

  *aDestLength = mByteOff;

  mByteOff = 0;
  mJamoCount = 0;
  return NS_OK;
}

//================================================================
NS_IMETHODIMP
nsUnicodeToJamoTTF::Reset()
{

  if (mJamos != nsnull && mJamos != mJamosStatic)
    PR_Free(mJamos);
  mJamos = mJamosStatic;
  mJamosMaxLength = sizeof(mJamosStatic) / sizeof(PRUnichar);
  memset(mJamos, 0, sizeof(mJamosStatic));
  mJamoCount = 0;
  mByteOff = 0;

  return NS_OK;
}

NS_IMETHODIMP
nsUnicodeToJamoTTF::GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
                                 PRInt32 * aDestLength)
{
  // a precomposed Hangul syllable can be decomposed into 3 Jamos, each of
  // which takes 2bytes.
  *aDestLength = aSrcLength *  6;
  return NS_OK;
}


NS_IMETHODIMP
nsUnicodeToJamoTTF::FillInfo(PRUint32* aInfo)
{
  FillInfoRange(aInfo, SBASE, SEND);

  PRUnichar i;

  // Hangul Conjoining Jamos
  for(i = 0x1100; i<= 0x1159; i++)
     SET_REPRESENTABLE(aInfo, i);
  SET_REPRESENTABLE(aInfo, 0x115f);
  for(i = 0x1160; i <= 0x11a2; i++)
     SET_REPRESENTABLE(aInfo, i);
  for(i = 0x11a8; i <= 0x11f9; i++)
     SET_REPRESENTABLE(aInfo, i);

  // Hangul Tone marks
  SET_REPRESENTABLE(aInfo, HTONE1);
  SET_REPRESENTABLE(aInfo, HTONE2);

  // UnPark  fonts have US-ASCII chars.
  for(i=0x20; i < 0x7f; i++)
     SET_REPRESENTABLE(aInfo, i);

  nsresult rv;

  // UnPark fonts have Hanjas and symbols defined in KS X 1001 as well.

  // XXX: Do we need to exclude Cyrillic, Greek letters and some Latin letters
  // included in KS X 1001 as 'symbol characters'?
  // KS X 1001 has only a subset of Greek and Cyrillic alphabets and
  // Latin letters with diacritic marks so that including them may
  // result in ransom-note like effect if it is listed *before*
  // any genuine Greek/Russian/Latin fonts in CSS.

  // Lead byte range for symbol chars. in EUC-KR : 0xA1 - 0xAF
  rv = FillInfoEUCKR(aInfo, 0xA1, 0xAF);
  NS_ENSURE_SUCCESS(rv, rv);

  // Lead byte range for Hanja in EUC-KR : 0xCA - 0xFD.
  return FillInfoEUCKR(aInfo, 0xCA, 0xFD);
}

/**
 * Copied from mslvt.otp by Jin-Hwan Cho <chofchof@ktug.or.kr>.
 * Extended by Jungshik Shin <jshin@mailaps.org> to support
 * additional Jamo clusters not encoded in U+1100 Jamo block
 * as precomposed Jamo clsuters.
 * Corrected by Won-Kyu Park <wkpark@chem.skku.ac.kr>.
 * See http://www.ktug.or.kr for its use in Lambda and swindow/SFontTTF.cpp at
 * http://www.yudit.org for its use in Yudit.
 * A patch with the same set of tables was submitted for
 * inclusion in Pango (http://www.pango.org).
 */

/**
 * Mapping from LC code points  to glyph indices in UnPark fonts.
 * UnPark fonts have the same glyph arrangement as Ogulim font, but
 * they have them in BMP PUA (beginning at U+E000) to be proper Unicode
 * fonts unlike Ogulim font with Jamo glyphs in CJK ideograph code points.
 * Glyph indices for 90 LCs encoded in U+1100 block are followed by  6 reserved
 * code points  and  glyph indices for 34 additional consonant  clusters
 * (not assigned code points of their own)  for which separate glyphs exist in
 * UnPark fonts.
 * The first element is for Kiyeok and UP_LBASE is set to Lfill glyph(0xe000)
 * so that the first element is '1' to map it to glyph for Kiyeok at 0xe006.
 * (there are six glyphs for each LC in UnPark fonts.)
 */
const static PRUint8 gUnParkLcGlyphMap[130] = {
  1,  2,  4, 12, 14, 20, 36, 42, 46, 62, 70, 85,100,102,108,113,
114,116,120,  5,  6,  7,  8, 13, 23, 26, 34, 35, 39, 41, 43, 44,
 45, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 65,
 66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
 84, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99,101,104,105,
106,107,109,110,111,112,117,119,122,123,  0,  0,  0,  0,  0,  0,
  3,  9, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29,
 30, 31, 32, 33, 37, 38, 40, 53, 56, 59, 71, 88, 98,103,115,118,
121, 124
};

/**
 * Mapping from vowel code points  to glyph indices in UnPark/Oxxx font.
 * Glyphs for 28 additional vowel clusters (not given separate
 * code points in U+1100 block) are available in O*ttf fonts.
 * Total count: 95 = 1(Vfill) + 66 (in U+1100 block) + 28 (extra.)
 */
const static PRUint8 gUnParkVoGlyphMap[95] = {
   0,  1,  5,  6, 10, 11, 15, 16, 20, 21, 22, 23, 33, 34, 43, 46,
  48, 52, 54, 64, 71, 73,  2,  3,  7,  8, 12, 13, 14, 18, 19, 26,
  27, 29, 30, 32, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51, 55, 57,
  58, 59, 60, 62, 63, 69, 70, 72, 74, 75, 80, 83, 85, 87, 88, 90,
  92, 93, 94,  4,  9, 17, 24, 25, 28, 31, 35, 36, 39, 49, 53, 56,
  61, 65, 66, 67, 68, 76, 77, 78, 79, 81, 82, 84, 86, 89, 91
};

/**
 * Mapping from TC code points  to glyph indices in UnPark/Oxxx font.
 * glyphs for 59 additional trailing consonant clusters (not given separate
 * code points in U+1100 blocks) are available in O*ttf fonts.
 * Total count: 141 = 82 (in U+1100 block) + 59 (extra.)
 * The first element is Kiyeok and UP_TBASE is set to 0x5204 (Kiyeok).
 */
const static PRUint8 gUnParkTcGlyphMap[141] = {
   0,  1,  5, 10, 17, 20, 21, 32, 33, 42, 46, 52, 57, 58, 59, 63,
  78, 84, 91, 98,109,123,127,128,129,130,135,  3,  6, 11, 13, 15,
  16, 19, 22, 25, 35, 37, 38, 39, 40, 43, 44, 48, 50, 51, 53, 54,
  56, 60, 64, 67, 69, 71, 72, 73, 75, 76, 77, 80, 88, 89, 90, 92,
  93, 94, 96,106,110,111,114,115,117,119,120,131,134,136,137,138,
 139,140,  2,  4,  7,  8,  9, 12, 14, 18, 23, 24, 26, 27, 28, 29,
  30, 31, 34, 36, 41, 45, 47, 49, 55, 61, 62, 65, 66, 68, 70, 74,
  79, 81, 82, 83, 85, 86, 87, 95, 97, 99,100,101,102,103,104,105,
 107,108,112,113,116,118,121,122,124,125,126,132,133
};

/* Which of six glyphs to use for choseong(L) depends on
   the following vowel and whether or not jongseong(T) is present
   in a syllable. Note that The first(0th) element is for Vfill.

   shape Number of choseong(L) w.r.t. jungseong(V) without jongseong(T)

   95 = 1(Vfill) + 66 + 28 (extra)
*/

const static PRUint8 gUnParkVo2LcMap[95] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1,
  1, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1,
  1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 1, 0, 2, 1,
  2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
  2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2
};

/* shape Number of choseong(L) w.r.t. jungseong(V) with jongseong(T) */

const static PRUint8 gUnParkVo2LcMap2[95] = {
  3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 5, 4,
  4, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
  4, 4, 5, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4,
  4, 4, 4, 5, 4, 5, 5, 4, 3, 3, 4, 4, 4, 3, 5, 4,
  5, 4, 5, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4,
  5, 4, 4, 4, 5, 4, 3, 3, 3, 4, 4, 4, 3, 5, 5
};

/* shape Number of jongseong(T) w.r.t. jungseong(V)
   Which of four glyphs to use for jongseong(T) depends on
   the preceding vowel. */

const static PRUint8 gUnParkVo2TcMap[95] = {
  3, 0, 2, 0, 2, 1, 2, 1, 2, 3, 0, 2, 1, 3, 3, 1,
  2, 1, 3, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
  2, 2, 3, 3, 0, 2, 1, 3, 1, 0, 2, 1, 2, 3, 0, 1,
  2, 1, 2, 3, 1, 3, 3, 1, 2, 2, 1, 1, 1, 1, 3, 1,
  3, 1, 3, 0, 1, 0, 0, 0, 2, 3, 0, 2, 1, 1, 2, 2,
  3, 0, 0, 0, 3, 0, 2, 2, 2, 1, 0, 1, 2, 1, 1
};

NS_IMETHODIMP
nsUnicodeToJamoTTF::composeHangul(char* aResult)
{
  PRInt32 length = mJamoCount, i;
  nsresult rv = NS_OK;

  if (!length)
  {
    NS_WARNING("composeHangul() : zero length string comes in ! \n");
    return NS_ERROR_UNEXPECTED;
  }

  if (!aResult)
    return NS_ERROR_NULL_POINTER;

  // Put Hangul tone mark first as it should be to the left of
  // the character it follows.
  // XXX : What should we do when a tone mark come by itself?

  if (IS_TONE(mJamos[length - 1]))
  {
    aResult[mByteOff++] = PRUint8(mJamos[length - 1] >> 8);
    aResult[mByteOff++] = PRUint8(mJamos[length - 1] & 0xff);
    if (--length == 0)
      return rv;
  }

  // no more processing is necessary for precomposed modern Hangul syllables.
  if (length == 1 && IS_SYL(mJamos[0]))
  {
    aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
    aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
    return rv;
  }

  if (CHAR_CLASS(mJamos[0]) == KO_CHAR_CLASS_NOHANGUL)
  {
    NS_ASSERTION(length == 1, "A non-Hangul should come by itself !!\n");
    aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
    aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
    return rv;
  }

  nsXPIDLString buffer;

  rv =  JamoNormalize(mJamos, getter_Copies(buffer), &length);

  // safe to cast away const.
  PRUnichar* text = buffer.BeginWriting();
  NS_ENSURE_SUCCESS(rv, rv);

  text += RenderAsPrecompSyllable(text, &length, aResult);

  if (!length)
    return rv;

  // convert to extended Jamo sequence
  JamosToExtJamos(text, &length);


  // Check if not in LV or LVT form after the conversion
  if (length != 2 && length != 3 ||
      (!IS_LC_EXT(text[0]) || !IS_VO_EXT(text[1]) ||
       (length == 3 && !IS_TC_EXT(text[2]))))
    goto fallback;

//  Now that text[0..2] are identified as L,V, and T, it's safe to
//  shift them back to U+1100 block although their ranges overlap each other.

  text[0] -= LC_OFFSET;
  text[1] -= VO_OFFSET;
  if (length == 3)
    text[2] -= TC_OFFSET;

  if (length != 3)
  {
    text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
              gUnParkVo2LcMap[text[1] - VFILL] + UP_LBASE;
    text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE;
  }
  else
  {
    text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
              gUnParkVo2LcMap2[text[1] - VFILL] + UP_LBASE;
    text[2] = gUnParkTcGlyphMap[text[2] - TSTART] * 4 +
              gUnParkVo2TcMap[text[1] - VFILL] + UP_TBASE;
    text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE + 1;
  }

  // Xft doesn't like blank glyphs at code points other than listed in
  // the blank glyph list. Replace Lfill glyph code points of UnPark
  // fonts with standard LFILL code point (U+115F).

  if (UP_LBASE <= text[0] && text[0] < UP_LBASE + 6)
    text[0] = LFILL;

  // The same is true of glyph code points corresponding to VFILL
  // in UnBatang-like fonts. VFILL is not only blank but also non-advancing
  // so that we can just skip it.
  if (UP_VBASE <= text[1] && text[1] < UP_VBASE + 2)
  {
    --length;
    if (length == 2)
      text[1] = text[2];
  }

  for (i = 0 ; i < length; i++)
  {
    aResult[mByteOff++] = PRUint8(text[i] >> 8);
    aResult[mByteOff++] = PRUint8(text[i] & 0xff);
  }

  return rv;


  /* If jamo sequence is not convertible to a jamo cluster,
   * just enumerate stand-alone jamos. Prepend V and T with  Lf.
   *
   * XXX: It might be better to search for a sub-sequence (not just at the
   * beginning of a cluster but also in the middle or at the end.)
   * that can be rendered as precomposed and render it as such and enumerate
   * jamos in the rest. This approach is useful when a simple Xkb-based input
   * is used.
   */

fallback:
  for (i = 0; i < length; i++)
  {
    PRUnichar wc=0, wc2=0;
    /* skip Lfill and Vfill if they're not the sole char. in a cluster */
    if (length > 1 &&
         (text[i] - LC_OFFSET == LFILL || text[i] - VO_OFFSET == VFILL))
      continue;
    else if (IS_LC_EXT (text[i]))
       wc = gUnParkLcGlyphMap[text[i] - LC_OFFSET - LBASE] * 6 + UP_LBASE;
    else
    {
  /* insert Lfill glyph to advance cursor pos. for V and T */
      wc = LBASE;
  /* don't have to draw Vfill. Drawing Lfill is sufficient. */
      if (text[i] - VO_OFFSET != VFILL)
        wc2 = IS_VO_EXT (text[i]) ?
        gUnParkVoGlyphMap[text[i] - VO_OFFSET - VFILL] * 2 + UP_VBASE:
        gUnParkTcGlyphMap[text[i] - TC_OFFSET - TSTART] * 4 + UP_TBASE + 3;
    }
    aResult[mByteOff++] = PRUint8(wc >> 8);
    aResult[mByteOff++] = PRUint8(wc & 0xff);

    if (wc2)
    {
      aResult[mByteOff++] = wc2 >> 8;
      aResult[mByteOff++] = wc2 & 0xff;
    }
  }

  return rv;
}

int
nsUnicodeToJamoTTF::RenderAsPrecompSyllable (PRUnichar* aSrc,
                                             PRInt32* aSrcLength, char* aResult)
{

  int composed = 0;

  if (*aSrcLength == 3 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]) &&
      IS_SYL_TC(aSrc[2]))
    composed = 3;
  else if (*aSrcLength == 2 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]))
    composed = 2;
  else
    composed = 0;

  if (composed)
  {
    PRUnichar wc;
    if (composed == 3)
      wc = SYL_FROM_LVT(aSrc[0], aSrc[1], aSrc[2]);
    else
      wc = SYL_FROM_LVT(aSrc[0], aSrc[1], TBASE);
    aResult[mByteOff++] = PRUint8(wc >> 8);
    aResult[mByteOff++] = PRUint8(wc & 0xff);
  }

  *aSrcLength -= composed;

  return composed;
}

// Fill up Cmap array quickly for a rather large range.
/* static */
inline void FillInfoRange(PRUint32* aInfo, PRUint32 aStart, PRUint32 aEnd)
{

  PRUint32 b = aStart >> 5;
  PRUint32 e = aEnd >> 5;

  if (aStart & 0x1f)
    aInfo[b++] |= ~ (0xFFFFFFFFL >> (32 - ((aStart) & 0x1f)));

  for( ; b < e ; b++)
    aInfo[b] |= 0xFFFFFFFFL;

  aInfo[e] |= (0xFFFFFFFFL >> (31 - ((aEnd) & 0x1f)));
}


#define ROWLEN 94
#define IS_GR94(x) (0xA0 < (x) && (x) < 0xFF)

// Given a range [aHigh1, aHigh2] in high bytes of EUC-KR, convert
// rows of 94 characters in the range (row by row) to Unicode and set
// representability if the result is not 0xFFFD (Unicode replacement char.).
/* static */
nsresult FillInfoEUCKR (PRUint32 *aInfo, PRUint16 aHigh1, PRUint16 aHigh2)
{
  char row[ROWLEN * 2];
  PRUnichar dest[ROWLEN];
  nsresult rv = NS_OK;

  NS_ENSURE_TRUE(aInfo, NS_ERROR_NULL_POINTER);
  NS_ENSURE_TRUE(IS_GR94(aHigh1) && IS_GR94(aHigh2), NS_ERROR_INVALID_ARG);

  nsCOMPtr<nsIUnicodeDecoder> decoder;
  rv = GetDecoder(getter_AddRefs(decoder));
  NS_ENSURE_SUCCESS(rv,rv);

  for (PRUint16 i = aHigh1 ; i <= aHigh2; i++)
  {
    PRUint16 j;
    // handle a row of 94 char. at a time.
    for (j = 0 ; j < ROWLEN; j++)
    {
      row[j * 2] = char(i);
      row[j * 2 + 1] = char(j + 0xa1);
    }
    PRInt32 srcLen = ROWLEN * 2;
    PRInt32 destLen = ROWLEN;
    rv = decoder->Convert(row, &srcLen, dest, &destLen);
    NS_ENSURE_SUCCESS(rv, rv);

    // set representability according to the conversion result.
    for (j = 0 ; j < ROWLEN; j++)
      if (dest[j] != 0xFFFD)
        SET_REPRESENTABLE(aInfo, dest[j]);
  }
  return rv;
}

/* static */
nsresult GetDecoder(nsIUnicodeDecoder** aDecoder)
{
  nsresult rv;

  if (gDecoder) {
    *aDecoder = gDecoder.get();
    NS_ADDREF(*aDecoder);
    return NS_OK;
  }

  nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
  charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
  NS_ENSURE_SUCCESS(rv,rv);
  rv = charsetConverterManager->GetUnicodeDecoderRaw("EUC-KR", getter_AddRefs(gDecoder));
  NS_ENSURE_SUCCESS(rv,rv);

  *aDecoder = gDecoder.get();
  NS_ADDREF(*aDecoder);
  return NS_OK;
}


/* static */
PRInt32 JamoNormMapComp (const JamoNormMap& p1, const JamoNormMap& p2)
{
  if (p1.seq[0] != p2.seq[0])
    return p1.seq[0] - p2.seq[0];
  if (p1.seq[1] != p2.seq[1])
    return p1.seq[1] - p2.seq[1];
  return p1.seq[2] - p2.seq[2];
}

/* static */
const JamoNormMap* JamoClusterSearch (JamoNormMap aKey,
                                const JamoNormMap* aClusters,
                                PRInt16 aClustersSize)
{

  if (aClustersSize <= 0 || !aClusters)
  {
    NS_WARNING("aClustersSize <= 0 || !aClusters");
    return nsnull;
  }

  if (aClustersSize < 9)
  {
    PRInt16 i;
    for (i = 0; i < aClustersSize; i++)
      if (JamoNormMapComp (aKey, aClusters[i]) == 0)
        return aClusters + i;
    return nsnull;
  }

  PRUint16 l = 0, u = aClustersSize - 1;
  PRUint16 h = (l + u) / 2;

  if (JamoNormMapComp (aKey, aClusters[h]) < 0)
    return JamoClusterSearch(aKey, &(aClusters[l]), h - l);
  else if (JamoNormMapComp (aKey, aClusters[h]) > 0)
    return JamoClusterSearch(aKey, &(aClusters[h + 1]), u - h);
  else
    return aClusters + h;

}


/*
 *  look up cluster array for all possible matching Jamo sequences
 *  in 'aIn' and  replace all matching substrings with match->liga in place.
 *  returns the difference in aLength between before and after the replacement.
 *  XXX : 1. Do we need caching here?
 **/

/* static */
PRInt16 JamoSrchReplace (const JamoNormMap* aClusters,
                         PRUint16 aClustersSize, PRUnichar* aIn,
                         PRInt32* aLength, PRUint16 aOffset)
{
  PRInt32 origLen = *aLength;

  // non-zero third element => clusternLen = 3. otherwise, it's 2.
  PRUint16 clusterLen = aClusters[0].seq[2] ? 3 : 2;

  PRInt32 start = 0, end;

  // identify the substring of aIn with values in [aOffset, aOffset + 0x100).
  while (start < origLen && (aIn[start] & 0xff00) != aOffset)
    ++start;
  for (end=start; end < origLen && (aIn[end] & 0xff00) == aOffset; ++end);

  // now process the substring aIn[start] .. aIn[end]
  // we don't need a separate range check here because the one in
  // for-loop is sufficient.
  for (PRInt32 i = start; i <= end - clusterLen; i++)
  {
    const JamoNormMap *match;
    JamoNormMap key;

    // cluster array is made up of PRUint8's to save memory
    // and we have to subtract aOffset from the input before looking it up.
    key.seq[0] = aIn[i] - aOffset;
    key.seq[1] = aIn[i + 1] - aOffset;
    key.seq[2] = clusterLen == 3 ? (aIn[i + 2] - aOffset) : 0;

    match = JamoClusterSearch (key, aClusters, aClustersSize);

    if (match)
    {
      aIn[i] = match->liga + aOffset; // add back aOffset.

      // move up the 'tail'
      for (PRInt32 j = i + clusterLen ; j < *aLength; j++)
        aIn[j - clusterLen + 1] = aIn[j];

      end -= (clusterLen - 1);
      *aLength -= (clusterLen - 1);
    }
  }

  return *aLength - origLen;
}

/* static */
nsresult ScanDecomposeSyllable(PRUnichar* aIn, PRInt32 *aLength,
                               const PRInt32 maxLength)
{
  nsresult rv = NS_OK;

  if (!aIn || *aLength < 1 || maxLength < *aLength + 2)
    return NS_ERROR_INVALID_ARG;

  PRInt32 i = 0;
  while (i < *aLength && !IS_SYL(aIn[i]))
    i++;

  // Convert a precomposed syllable to an LV or LVT sequence.
  if (i < *aLength && IS_SYL(aIn[i]))
  {
    PRUint16 j = IS_SYL_WITH_TC(aIn[i]) ? 1 : 0;
    aIn[i] -= SBASE;
    memmove(aIn + i + 2 + j, aIn + i + 1, *aLength - i - 1);
    if (j)
      aIn[i + 2] = aIn[i] % TCOUNT + TBASE;
    aIn[i + 1] = (aIn[i] / TCOUNT) % VCOUNT + VBASE;
    aIn[i] = aIn[i] / (TCOUNT * VCOUNT) + LBASE;
    *aLength += 1 + j;
  }

  return rv;
}

/*
 *  1. Normalize (regularize) a jamo sequence to the regular
 *     syllable form defined in Unicode 3.2 section 3.11 to the extent
 *     that it's useful in rendering by render_func's().
 *
 *  2. Replace a compatibly decomposed Jamo sequence (unicode 2.0
 *     definition) with a 'precomposed' Jamo cluster (with codepoint
 *     of its own in U+1100 block). For instance, a seq.
 *     of U+1100, U+1100 is replaced by U+1101. It actually
 *     more than Unicode 2.0 decomposition map suggests.
 *     For a Jamo cluster made up of three basic Jamos
 *     (e.g. U+1133 : Sios, Piup, Kiyeok), not only
 *      a sequence of Sios(U+1109), Piup(U+1107) and
 *     Kiyeok(U+1100) but also two more sequences,
 *     {U+1132(Sios-Pieup), U+1100(Kiyeok) and {Sios(U+1109),
 *      U+111E(Piup-Kiyeok)} are mapped to U+1133.
 *
 *  3. the result is returned in a newly malloced
 *     PRUnichar*. Callers have to delete it, which
 *     is taken care of by using nsXPIDLString in caller.
 */

/* static */
nsresult JamoNormalize(const PRUnichar* aInSeq, PRUnichar** aOutSeq,
                       PRInt32* aLength)
{
  if (!aInSeq || !aOutSeq || *aLength <= 0)
    return NS_ERROR_INVALID_ARG;

  // 4 more slots : 2 for Lf and Vf, 2 for decomposing a modern precomposed
  // syllable into a Jamo sequence of LVT?.
  *aOutSeq = new PRUnichar[*aLength + 4];
  if (!*aOutSeq)
    return NS_ERROR_OUT_OF_MEMORY;
  memcpy(*aOutSeq, aInSeq, *aLength * sizeof(PRUnichar));

  nsresult rv = ScanDecomposeSyllable(*aOutSeq, aLength, *aLength + 4);
  NS_ENSURE_SUCCESS(rv, rv);

  // LV or LVT : no need to search for and replace jamo sequences
  if ((*aLength == 2 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1])) ||
      (*aLength == 3 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1]) &&
      IS_TC((*aOutSeq)[2])))
    return NS_OK;

  // remove Lf in LfL sequence that may occur in an interim cluster during
  // a simple Xkb-based input.
  if ((*aOutSeq)[0] == LFILL && *aLength > 1 && IS_LC((*aOutSeq)[1]))
  {
    memmove (*aOutSeq, *aOutSeq + 1, (*aLength - 1) * sizeof(PRUnichar));
    (*aLength)--;
  }

  if (*aLength > 1)
  {
    JamoSrchReplace (gJamoClustersGroup1,
        sizeof(gJamoClustersGroup1) / sizeof(gJamoClustersGroup1[0]),
        *aOutSeq, aLength, LBASE);
    JamoSrchReplace (gJamoClustersGroup234,
        sizeof(gJamoClustersGroup234) / sizeof(gJamoClustersGroup234[0]),
        *aOutSeq, aLength, LBASE);
  }

  // prepend a leading V with Lf
  if (IS_VO((*aOutSeq)[0]))
  {
     memmove(*aOutSeq + 1, *aOutSeq, *aLength * sizeof(PRUnichar));
    (*aOutSeq)[0] = LFILL;
    (*aLength)++;
  }
  /* prepend a leading T with LfVf */
  else if (IS_TC((*aOutSeq)[0]))
  {
    memmove (*aOutSeq + 2, *aOutSeq, *aLength * sizeof(PRUnichar));
    (*aOutSeq)[0] = LFILL;
    (*aOutSeq)[1] = VFILL;
    *aLength += 2;
  }
  return NS_OK;
}


/*  JamosToExtJamos() :
 *  1. shift jamo sequences to three disjoint code blocks in
 *     PUA (0xF000 for LC, 0xF1000 for VO, 0xF200 for TC).
 *  2. replace a jamo sequence with a precomposed extended
 *     cluster jamo code point in PUA
 *  3. this replacement is done 'in place'
 */

/* static */
void JamosToExtJamos (PRUnichar* aInSeq,  PRInt32* aLength)
{
  // translate jamo code points to temporary code points in PUA
  for (PRInt32 i = 0; i < *aLength; i++)
  {
    if (IS_LC(aInSeq[i]))
      aInSeq[i] += LC_OFFSET;
    else if (IS_VO(aInSeq[i]))
      aInSeq[i] += VO_OFFSET;
    else if (IS_TC(aInSeq[i]))
      aInSeq[i] += TC_OFFSET;
  }

  // LV or LVT : no need to search for and replace jamo sequences
  if ((*aLength == 2 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1])) ||
      (*aLength == 3 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1]) &&
       IS_TC_EXT(aInSeq[2])))
    return;

  // replace a sequence of Jamos with the corresponding precomposed
  // Jamo cluster in PUA

  JamoSrchReplace (gExtLcClustersGroup1,
      sizeof (gExtLcClustersGroup1) / sizeof (gExtLcClustersGroup1[0]),
      aInSeq, aLength, LC_TMPPOS);
  JamoSrchReplace (gExtLcClustersGroup2,
       sizeof (gExtLcClustersGroup2) / sizeof (gExtLcClustersGroup2[0]),
       aInSeq, aLength, LC_TMPPOS);
  JamoSrchReplace (gExtVoClustersGroup1,
       sizeof (gExtVoClustersGroup1) / sizeof (gExtVoClustersGroup1[0]),
       aInSeq, aLength, VO_TMPPOS);
  JamoSrchReplace (gExtVoClustersGroup2,
       sizeof (gExtVoClustersGroup2) / sizeof (gExtVoClustersGroup2[0]),
       aInSeq, aLength, VO_TMPPOS);
  JamoSrchReplace (gExtTcClustersGroup1,
       sizeof (gExtTcClustersGroup1) / sizeof (gExtTcClustersGroup1[0]),
       aInSeq, aLength, TC_TMPPOS);
  JamoSrchReplace (gExtTcClustersGroup2,
       sizeof (gExtTcClustersGroup2) / sizeof (gExtTcClustersGroup2[0]),
       aInSeq, aLength, TC_TMPPOS);
    return;
}