move to intl/unicharutil

1999-01-08 00:43:25 +00:00 · 1999-01-08 00:43:25 +00:00 · 38e75adbe9
--- a/modules/unicharutil/Makefile.in
+++ b/modules/unicharutil/Makefile.in
--- a/modules/unicharutil/makefile.win
+++ b/modules/unicharutil/makefile.win
--- a/modules/unicharutil/note.txt
+++ b/modules/unicharutil/note.txt
--- a/modules/unicharutil/tools/MUTTUCData.txt
+++ b/modules/unicharutil/tools/MUTTUCData.txt
@ -1,208 +0,0 @@
-#
-# $Id: MUTTUCData.txt,v 1.1 1999-01-06 01:46:03 ftang%netscape.com Exp $
-#
-# Copyright 1996, 1997, 1998 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Implementation specific character properties.
-#
-#
-# Space, other.
-#
-0009;;Ss;;;;;;;;;;;;
-000A;;Ss;;;;;;;;;;;;
-000B;;Ss;;;;;;;;;;;;
-000C;;Ss;;;;;;;;;;;;
-000D;;Ss;;;;;;;;;;;;
-#
-# Non-breaking.
-#
-00A0;;Nb;;;;;;;;;;;;
-2007;;Nb;;;;;;;;;;;;
-2011;;Nb;;;;;;;;;;;;
-FEFF;;Nb;;;;;;;;;;;;
-#
-# Symmetric.
-#
-0028;;Sy;;;;;;;;;;;;
-0029;;Sy;;;;;;;;;;;;
-005B;;Sy;;;;;;;;;;;;
-005D;;Sy;;;;;;;;;;;;
-007B;;Sy;;;;;;;;;;;;
-007D;;Sy;;;;;;;;;;;;
-00AB;;Sy;;;;;;;;;;;;
-00BB;;Sy;;;;;;;;;;;;
-0F3A;;Sy;;;;;;;;;;;;
-0F3B;;Sy;;;;;;;;;;;;
-0F3C;;Sy;;;;;;;;;;;;
-0F3D;;Sy;;;;;;;;;;;;
-0F3E;;Sy;;;;;;;;;;;;
-0F3F;;Sy;;;;;;;;;;;;
-2018;;Sy;;;;;;;;;;;;
-2019;;Sy;;;;;;;;;;;;
-201A;;Sy;;;;;;;;;;;;
-201B;;Sy;;;;;;;;;;;;
-201C;;Sy;;;;;;;;;;;;
-201D;;Sy;;;;;;;;;;;;
-201E;;Sy;;;;;;;;;;;;
-201F;;Sy;;;;;;;;;;;;
-2039;;Sy;;;;;;;;;;;;
-203A;;Sy;;;;;;;;;;;;
-2045;;Sy;;;;;;;;;;;;
-2046;;Sy;;;;;;;;;;;;
-207D;;Sy;;;;;;;;;;;;
-207E;;Sy;;;;;;;;;;;;
-208D;;Sy;;;;;;;;;;;;
-208E;;Sy;;;;;;;;;;;;
-2329;;Sy;;;;;;;;;;;;
-232A;;Sy;;;;;;;;;;;;
-3008;;Sy;;;;;;;;;;;;
-3009;;Sy;;;;;;;;;;;;
-300A;;Sy;;;;;;;;;;;;
-300B;;Sy;;;;;;;;;;;;
-300C;;Sy;;;;;;;;;;;;
-300D;;Sy;;;;;;;;;;;;
-300E;;Sy;;;;;;;;;;;;
-300F;;Sy;;;;;;;;;;;;
-3010;;Sy;;;;;;;;;;;;
-3011;;Sy;;;;;;;;;;;;
-3014;;Sy;;;;;;;;;;;;
-3015;;Sy;;;;;;;;;;;;
-3016;;Sy;;;;;;;;;;;;
-3017;;Sy;;;;;;;;;;;;
-3018;;Sy;;;;;;;;;;;;
-3019;;Sy;;;;;;;;;;;;
-301A;;Sy;;;;;;;;;;;;
-301B;;Sy;;;;;;;;;;;;
-301D;;Sy;;;;;;;;;;;;
-301E;;Sy;;;;;;;;;;;;
-FD3E;;Sy;;;;;;;;;;;;
-FD3F;;Sy;;;;;;;;;;;;
-FE35;;Sy;;;;;;;;;;;;
-FE36;;Sy;;;;;;;;;;;;
-FE37;;Sy;;;;;;;;;;;;
-FE38;;Sy;;;;;;;;;;;;
-FE39;;Sy;;;;;;;;;;;;
-FE3A;;Sy;;;;;;;;;;;;
-FE3B;;Sy;;;;;;;;;;;;
-FE3C;;Sy;;;;;;;;;;;;
-FE3D;;Sy;;;;;;;;;;;;
-FE3E;;Sy;;;;;;;;;;;;
-FE3F;;Sy;;;;;;;;;;;;
-FE40;;Sy;;;;;;;;;;;;
-FE41;;Sy;;;;;;;;;;;;
-FE42;;Sy;;;;;;;;;;;;
-FE43;;Sy;;;;;;;;;;;;
-FE44;;Sy;;;;;;;;;;;;
-FE59;;Sy;;;;;;;;;;;;
-FE5A;;Sy;;;;;;;;;;;;
-FE5B;;Sy;;;;;;;;;;;;
-FE5C;;Sy;;;;;;;;;;;;
-FE5D;;Sy;;;;;;;;;;;;
-FE5E;;Sy;;;;;;;;;;;;
-FF08;;Sy;;;;;;;;;;;;
-FF09;;Sy;;;;;;;;;;;;
-FF3B;;Sy;;;;;;;;;;;;
-FF3D;;Sy;;;;;;;;;;;;
-FF5B;;Sy;;;;;;;;;;;;
-FF5D;;Sy;;;;;;;;;;;;
-FF62;;Sy;;;;;;;;;;;;
-FF63;;Sy;;;;;;;;;;;;
-#
-# Hex digit.
-#
-0030;;Hd;;;;;;;;;;;;
-0031;;Hd;;;;;;;;;;;;
-0032;;Hd;;;;;;;;;;;;
-0033;;Hd;;;;;;;;;;;;
-0034;;Hd;;;;;;;;;;;;
-0035;;Hd;;;;;;;;;;;;
-0036;;Hd;;;;;;;;;;;;
-0037;;Hd;;;;;;;;;;;;
-0038;;Hd;;;;;;;;;;;;
-0039;;Hd;;;;;;;;;;;;
-0041;;Hd;;;;;;;;;;;;
-0042;;Hd;;;;;;;;;;;;
-0043;;Hd;;;;;;;;;;;;
-0044;;Hd;;;;;;;;;;;;
-0045;;Hd;;;;;;;;;;;;
-0046;;Hd;;;;;;;;;;;;
-0061;;Hd;;;;;;;;;;;;
-0062;;Hd;;;;;;;;;;;;
-0063;;Hd;;;;;;;;;;;;
-0064;;Hd;;;;;;;;;;;;
-0065;;Hd;;;;;;;;;;;;
-0066;;Hd;;;;;;;;;;;;
-FF10;;Hd;;;;;;;;;;;;
-FF11;;Hd;;;;;;;;;;;;
-FF12;;Hd;;;;;;;;;;;;
-FF13;;Hd;;;;;;;;;;;;
-FF14;;Hd;;;;;;;;;;;;
-FF15;;Hd;;;;;;;;;;;;
-FF16;;Hd;;;;;;;;;;;;
-FF17;;Hd;;;;;;;;;;;;
-FF18;;Hd;;;;;;;;;;;;
-FF19;;Hd;;;;;;;;;;;;
-FF21;;Hd;;;;;;;;;;;;
-FF22;;Hd;;;;;;;;;;;;
-FF23;;Hd;;;;;;;;;;;;
-FF24;;Hd;;;;;;;;;;;;
-FF25;;Hd;;;;;;;;;;;;
-FF26;;Hd;;;;;;;;;;;;
-FF41;;Hd;;;;;;;;;;;;
-FF42;;Hd;;;;;;;;;;;;
-FF43;;Hd;;;;;;;;;;;;
-FF44;;Hd;;;;;;;;;;;;
-FF45;;Hd;;;;;;;;;;;;
-FF46;;Hd;;;;;;;;;;;;
-#
-# Quote marks.
-#
-0022;;Qm;;;;;;;;;;;;
-0027;;Qm;;;;;;;;;;;;
-00AB;;Qm;;;;;;;;;;;;
-00BB;;Qm;;;;;;;;;;;;
-2018;;Qm;;;;;;;;;;;;
-2019;;Qm;;;;;;;;;;;;
-201A;;Qm;;;;;;;;;;;;
-201B;;Qm;;;;;;;;;;;;
-201C;;Qm;;;;;;;;;;;;
-201D;;Qm;;;;;;;;;;;;
-201E;;Qm;;;;;;;;;;;;
-201F;;Qm;;;;;;;;;;;;
-2039;;Qm;;;;;;;;;;;;
-203A;;Qm;;;;;;;;;;;;
-300C;;Qm;;;;;;;;;;;;
-300D;;Qm;;;;;;;;;;;;
-300E;;Qm;;;;;;;;;;;;
-300F;;Qm;;;;;;;;;;;;
-301D;;Qm;;;;;;;;;;;;
-301E;;Qm;;;;;;;;;;;;
-301F;;Qm;;;;;;;;;;;;
-FE41;;Qm;;;;;;;;;;;;
-FE42;;Qm;;;;;;;;;;;;
-FE43;;Qm;;;;;;;;;;;;
-FE44;;Qm;;;;;;;;;;;;
-FF02;;Qm;;;;;;;;;;;;
-FF07;;Qm;;;;;;;;;;;;
-FF62;;Qm;;;;;;;;;;;;
-FF63;;Qm;;;;;;;;;;;;
--- a/modules/unicharutil/tools/Makefile.in
+++ b/modules/unicharutil/tools/Makefile.in
--- a/modules/unicharutil/tools/UCDATAREADME.txt
+++ b/modules/unicharutil/tools/UCDATAREADME.txt
@ -1,207 +0,0 @@
-#
-# $Id: UCDATAREADME.txt,v 1.1 1999-01-06 01:46:03 ftang%netscape.com Exp $
-#
-
-                           MUTT UCData Package 1.9
-                           -----------------------
-
-This is a package that supports ctype-like operations for Unicode UCS-2 text
-(and surrogates), case mapping, and decomposition lookup.  To use it, you will
-need to get the "UnicodeData-2.0.14.txt" (or later) file from the Unicode Web
-or FTP site.
-
-This package consists of two parts:
-
-  1. A program called "ucgendat" which generates five data files from the
-     UnicodeData-2.*.txt file.  The files are:
-
-     A. case.dat   - the case mappings.
-     B. ctype.dat  - the character property tables.
-     C. decomp.dat - the character decompositions.
-     D. cmbcl.dat  - the non-zero combining classes.
-     E. num.dat    - the codes representing numbers.
-
-  2. The "ucdata.[ch]" files which implement the functions needed to
-     check to see if a character matches groups of properties, to map between
-     upper, lower, and title case, to look up the decomposition of a
-     character, look up the combining class of a character, and get the number
-     value of a character.
-
-A short reference to the functions available is in the "api.txt" file.
-
-Techie Details
-==============
-
-The "ucgendat" program parses files from the command line which are all in the
-Unicode Character Database (UCDB) format.  An additional properties file,
-"MUTTUCData.txt", provides some extra properties for some characters.
-
-The program looks for the two character properties fields (2 and 4), the
-combining class field (3), the decomposition field (5), the numeric value
-field (8), and the case mapping fields (12, 13, and 14).  The decompositions
-are recursively expanded before being written out.
-
-The decomposition table contains all the canonical decompositions.  This means
-all decompositions that do not have tags such as "<compat>" or "<font>".
-
-The data is almost all stored as unsigned longs (32-bits assumed) and the
-routines that load the data take care of endian swaps when necessary.  This
-also means that surrogates (>= 0x10000) can be placed in the data files the
-"ucgendat" program parses.
-
-The data is written as external files and broken into five parts so it can be
-selectively updated at runtime if necessary.
-
-The data files currently generated from the "ucgendat" program total about 56K
-in size all together.
-
-The format of the binary data files is documented in the "format.txt" file.
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-13 December 1998
-
-CHANGES
-=======
-
-Version 1.9
-----------
-1. Fixed a problem with an incorrect amount of storage being allocated for the
-   combining class nodes.
-
-2. Fixed an invalid initialization in the number code.
-
-3. Changed the Java template file formatting a bit.
-
-4. Added tables and function for getting decompositions in the Java class.
-
-Version 1.8
-----------
-1. Fixed a problem with adding certain ranges.
-
-2. Added two more macros for testing for identifiers.
-
-3. Tested with the UnicodeData-2.1.5.txt file.
-
-Version 1.7
-----------
-1. Fixed a problem with looking up decompositions in "ucgendat."
-
-Version 1.6
-----------
-1. Added two new properties introduced with UnicodeData-2.1.4.txt.
-
-2. Changed the "ucgendat.c" program a little to automatically align the
-   property data on a 4-byte boundary when new properties are added.
-
-3. Changed the "ucgendat.c" programs to only generate canonical
-   decompositions.
-
-4. Added two new macros ucisinitialpunct() and ucisfinalpunct() to check for
-   initial and final punctuation characters.
-
-5. Minor additions and changes to the documentation.
-
-Version 1.5
-----------
-1. Changed all file open calls to include binary mode with "b" for DOS/WIN
-   platforms.
-
-2. Wrapped the unistd.h include so it won't be included when compiled under
-   Win32.
-
-3. Fixed a bad range check for hex digits in ucgendat.c.
-
-4. Fixed a bad endian swap for combining classes.
-
-5. Added code to make a number table and associated lookup functions.
-   Functions added are ucnumber(), ucdigit(), and ucgetnumber().  The last
-   function is to maintain compatibility with John Cowan's "uctype" package.
-
-Version 1.4
-----------
-1. Fixed a bug with adding a range.
-
-2. Fixed a bug with inserting a range in order.
-
-3. Fixed incorrectly specified ucisdefined() and ucisundefined() macros.
-
-4. Added the missing unload for the combining class data.
-
-5. Fixed a bad macro placement in ucisweak().
-
-Version 1.3
-----------
-1. Bug with case mapping calculations fixed.
-
-2. Bug with empty character property entries fixed.
-
-3. Bug with incorrect type in the combining class lookup fixed.
-
-4. Some corrections done to api.txt.
-
-5. Bug in certain character property lookups fixed.
-
-6. Added a character property table that records the defined characters.
-
-7. Replaced ucisunknown() with ucisdefined() and ucisundefined().
-
-Version 1.2
-----------
-1. Added code to ucgendat to generate a combining class table.
-
-2. Fixed an endian problem with the byte count of decompositions.
-
-3. Fixed some minor problems in the "format.txt" file.
-
-4. Removed some bogus "Ss" values from MUTTUCData.txt file.
-
-5. Added API function to get combining class.
-
-6. Changed the open mode to "rb" so binary data files will be opened correctly
-   on DOS/WIN as well as other platforms.
-
-7. Added the "api.txt" file.
-
-Version 1.1
-----------
-1. Added ucisxdigit() which I overlooked.
-
-2. Added UC_LT to the ucisalpha() macro which I overlooked.
-
-3. Change uciscntrl() to include UC_CF.
-
-4. Added ucisocntrl() and ucfntcntrl() macros.
-
-5. Added a ucisblank() which I overlooked.
-
-6. Added missing properties to ucissymbol() and ucisnumber().
-
-7. Added ucisgraph() and ucisprint().
-
-8. Changed the "Mr" property to "Sy" to mark this subset of mirroring
-   characters as symmetric to avoid trampling the Unicode/ISO10646 sense of
-   mirroring.
-
-9. Added another property called "Ss" which includes control characters
-   traditionally seen as spaces in the isspace() macro.
-
-10. Added a bunch of macros to be API compatible with John Cowan's package.
-
-ACKNOWLEDGEMENTS
-================
-
-Thanks go to John Cowan <cowan@locke.ccil.org> for pointing out lots of
-missing things and giving me stuff, particularly a bunch of new macros.
-
-Thanks go to Bob Verbrugge <bob_verbrugge@nl.compuware.com> for pointing out
-various bugs.
-
-Thanks go to Christophe Pierret <cpierret@businessobjects.com> for pointing
-out that file modes need to have "b" for DOS/WIN machines, pointing out
-unistd.h is not a Win 32 header, and pointing out a problem with ucisalnum().
-
-Thanks go to Kent Johnson <kent@pondview.mv.com> for finding a bug that caused
-incomplete decompositions to be generated by the "ucgendat" program.
-
-Thanks go to Valeriy E. Ushakov <uwe@ptc.spbu.ru> for spotting an allocation
-error and an initialization error.
--- a/modules/unicharutil/tools/data/case.dat
+++ b/modules/unicharutil/tools/data/case.dat
--- a/modules/unicharutil/tools/data/cmbcl.dat
+++ b/modules/unicharutil/tools/data/cmbcl.dat
--- a/modules/unicharutil/tools/data/ctype.dat
+++ b/modules/unicharutil/tools/data/ctype.dat
--- a/modules/unicharutil/tools/data/decomp.dat
+++ b/modules/unicharutil/tools/data/decomp.dat
--- a/modules/unicharutil/tools/data/num.dat
+++ b/modules/unicharutil/tools/data/num.dat
--- a/modules/unicharutil/tools/format.txt
+++ b/modules/unicharutil/tools/format.txt
@ -1,243 +0,0 @@
-#
-# $Id: format.txt,v 1.1 1999-01-06 01:46:03 ftang%netscape.com Exp $
-#
-
-CHARACTER DATA
-==============
-
-This package generates some data files that contain character properties useful
-for text processing.
-
-CHARACTER PROPERTIES
-====================
-
-The first data file is called "ctype.dat" and contains a compressed form of
-the character properties found in the Unicode Character Database (UCDB).
-Additional properties can be specified in limited UCDB format in another file
-to avoid modifying the original UCDB.
-
-The following is a property name and code table to be used with the character
-data:
-
-NAME CODE DESCRIPTION
---------------------
-Mn   0    Mark, Non-Spacing
-Mc   1    Mark, Spacing Combining
-Me   2    Mark, Enclosing
-Nd   3    Number, Decimal Digit
-Nl   4    Number, Letter
-No   5    Number, Other
-Zs   6    Separator, Space
-Zl   7    Separator, Line
-Zp   8    Separator, Paragraph
-Cc   9    Other, Control
-Cf   10   Other, Format
-Cs   11   Other, Surrogate
-Co   12   Other, Private Use
-Cn   13   Other, Not Assigned
-Lu   14   Letter, Uppercase
-Ll   15   Letter, Lowercase
-Lt   16   Letter, Titlecase
-Lm   17   Letter, Modifier
-Lo   18   Letter, Other
-Pc   19   Punctuation, Connector
-Pd   20   Punctuation, Dash
-Ps   21   Punctuation, Open
-Pe   22   Punctuation, Close
-Po   23   Punctuation, Other
-Sm   24   Symbol, Math
-Sc   25   Symbol, Currency
-Sk   26   Symbol, Modifier
-So   27   Symbol, Other
-L    28   Left-To-Right
-R    29   Right-To-Left
-EN   30   European Number
-ES   31   European Number Separator
-ET   32   European Number Terminator
-AN   33   Arabic Number
-CS   34   Common Number Separator
-B    35   Block Separator
-S    36   Segment Separator
-WS   37   Whitespace
-ON   38   Other Neutrals
-Pi   47   Punctuation, Initial
-Pf   48   Punctuation, Final
-#
-# Implementation specific properties.
-#
-Cm   39   Composite
-Nb   40   Non-Breaking
-Sy   41   Symmetric (characters which are part of open/close pairs)
-Hd   42   Hex Digit
-Qm   43   Quote Mark
-Mr   44   Mirroring
-Ss   45   Space, Other (controls viewed as spaces in ctype isspace())
-Cp   46   Defined character
-
-The actual binary data is formatted as follows:
-
-  Assumptions: unsigned short is at least 16-bits in size and unsigned long
-               is at least 32-bits in size.
-
-    unsigned short ByteOrderMark
-    unsigned short OffsetArraySize
-    unsigned long  Bytes
-    unsigned short Offsets[OffsetArraySize + 1]
-    unsigned long  Ranges[N], N = value of Offsets[OffsetArraySize]
-
-  The Bytes field provides the total byte count used for the Offsets[] and
-  Ranges[] arrays.  The Offsets[] array is aligned on a 4-byte boundary and
-  there is always one extra node on the end to hold the final index of the
-  Ranges[] array.  The Ranges[] array contains pairs of 4-byte values
-  representing a range of Unicode characters.  The pairs are arranged in
-  increasing order by the first character code in the range.
-
-  Determining if a particular character is in the property list requires a
-  simple binary search to determine if a character is in any of the ranges
-  for the property.
-
-  If the ByteOrderMark is equal to 0xFFFE, then the data was generated on a
-  machine with a different endian order and the values must be byte-swapped.
-
-  To swap a 16-bit value:
-     c = (c >> 8) | ((c & 0xff) << 8)
-
-  To swap a 32-bit value:
-     c = ((c & 0xff) << 24) | (((c >> 8) & 0xff) << 16) |
-         (((c >> 16) & 0xff) << 8) | (c >> 24)
-
-CASE MAPPINGS
-=============
-
-The next data file is called "case.dat" and contains three case mapping tables
-in the following order: upper, lower, and title case.  Each table is in
-increasing order by character code and each mapping contains 3 unsigned longs
-which represent the possible mappings.
-
-The format for the binary form of these tables is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumMappingNodes, count of all mapping nodes
-  unsigned short CaseTableSizes[2], upper and lower mapping node counts
-  unsigned long  CaseTables[NumMappingNodes]
-
-  The starting indexes of the case tables are calculated as following:
-
-    UpperIndex = 0;
-    LowerIndex = CaseTableSizes[0] * 3;
-    TitleIndex = LowerIndex + CaseTableSizes[1] * 3;
-
-  The order of the fields for the three tables are:
-
-    Upper case
-    ----------
-    unsigned long upper;
-    unsigned long lower;
-    unsigned long title;
-
-    Lower case
-    ----------
-    unsigned long lower;
-    unsigned long upper;
-    unsigned long title;
-
-    Title case
-    ----------
-    unsigned long title;
-    unsigned long upper;
-    unsigned long lower;
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  Because the tables are in increasing order by character code, locating a
-  mapping requires a simple binary search on one of the 3 codes that make up
-  each node.
-
-  It is important to note that there can only be 65536 mapping nodes which
-  divided into 3 portions allows 21845 nodes for each case mapping table.  The
-  distribution of mappings may be more or less than 21845 per table, but only
-  65536 are allowed.
-
-DECOMPOSITIONS
-==============
-
-The next data file is called "decomp.dat" and contains the decomposition data
-for all characters with decompositions containing more than one character and
-are *not* compatibility decompositions.  Compatibility decompositions are
-signaled in the UCDB format by the use of the <compat> tag in the
-decomposition field.  Each list of character codes represents a full
-decomposition of a composite character.  The nodes are arranged in increasing
-order by character code.
-
-The format for the binary form of this table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumDecompNodes, count of all decomposition nodes
-  unsigned long  Bytes
-  unsigned long  DecompNodes[(NumDecompNodes * 2) + 1]
-  unsigned long  Decomp[N], N = sum of all counts in DecompNodes[]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The DecompNodes[] array consists of pairs of unsigned longs, the first of
-  which is the character code and the second is the initial index of the list
-  of character codes representing the decomposition.
-
-  Locating the decomposition of a composite character requires a binary search
-  for a character code in the DecompNodes[] array and using its index to
-  locate the start of the decomposition.  The length of the decomposition list
-  is the index in the following element in DecompNode[] minus the current
-  index.
-
-COMBINING CLASSES
-=================
-
-The fourth data file is called "cmbcl.dat" and contains the characters with
-non-zero combining classes.
-
-The format for the binary form of this table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumCCLNodes
-  unsigned long  Bytes
-  unsigned long  CCLNodes[NumCCLNodes * 3]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The CCLNodes[] array consists of groups of three unsigned longs.  The first
-  and second are the beginning and ending of a range and the third is the
-  combining class of that range.
-
-  If a character is not found in this table, then the combining class is
-  assumed to be 0.
-
-  It is important to note that only 65536 distinct ranges plus combining class
-  can be specified because the NumCCLNodes is usually a 16-bit number.
-
-NUMBER TABLE
-============
-
-The final data file is called "num.dat" and contains the characters that have
-a numeric value associated with them.
-
-The format for the binary form of the table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumNumberNodes
-  unsigned long  Bytes
-  unsigned long  NumberNodes[NumNumberNodes]
-  unsigned short ValueNodes[(Bytes - (NumNumberNodes * sizeof(unsigned long)))
-                            / sizeof(short)]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The NumberNodes array contains pairs of values, the first of which is the
-  character code and the second an index into the ValueNodes array.  The
-  ValueNodes array contains pairs of integers which represent the numerator
-  and denominator of the numeric value of the character.  If the character
-  happens to map to an integer, both the values in ValueNodes will be the
-  same.
--- a/modules/unicharutil/tools/makefile.win
+++ b/modules/unicharutil/tools/makefile.win
--- a/modules/unicharutil/tools/ucgendat.c
+++ b/modules/unicharutil/tools/ucgendat.c