Bug 1402090 - Remove obsolete ucdata code/tools, no longer used in the mozilla build. r=m_kato

2017-09-22 10:37:17 +01:00 · 2017-09-22 10:37:17 +01:00 · cc79ecacc0
--- a/intl/unicharutil/tools/MUTTUCData.txt
+++ b/intl/unicharutil/tools/MUTTUCData.txt
@ -1,208 +0,0 @@
-#
-# $Id: MUTTUCData.txt,v 1.1 1999/01/08 00:19:19 ftang%netscape.com Exp $
-#
-# Copyright 1996, 1997, 1998 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-#
-# Implementation specific character properties.
-#
-#
-# Space, other.
-#
-0009;;Ss;;;;;;;;;;;;
-000A;;Ss;;;;;;;;;;;;
-000B;;Ss;;;;;;;;;;;;
-000C;;Ss;;;;;;;;;;;;
-000D;;Ss;;;;;;;;;;;;
-#
-# Non-breaking.
-#
-00A0;;Nb;;;;;;;;;;;;
-2007;;Nb;;;;;;;;;;;;
-2011;;Nb;;;;;;;;;;;;
-FEFF;;Nb;;;;;;;;;;;;
-#
-# Symmetric.
-#
-0028;;Sy;;;;;;;;;;;;
-0029;;Sy;;;;;;;;;;;;
-005B;;Sy;;;;;;;;;;;;
-005D;;Sy;;;;;;;;;;;;
-007B;;Sy;;;;;;;;;;;;
-007D;;Sy;;;;;;;;;;;;
-00AB;;Sy;;;;;;;;;;;;
-00BB;;Sy;;;;;;;;;;;;
-0F3A;;Sy;;;;;;;;;;;;
-0F3B;;Sy;;;;;;;;;;;;
-0F3C;;Sy;;;;;;;;;;;;
-0F3D;;Sy;;;;;;;;;;;;
-0F3E;;Sy;;;;;;;;;;;;
-0F3F;;Sy;;;;;;;;;;;;
-2018;;Sy;;;;;;;;;;;;
-2019;;Sy;;;;;;;;;;;;
-201A;;Sy;;;;;;;;;;;;
-201B;;Sy;;;;;;;;;;;;
-201C;;Sy;;;;;;;;;;;;
-201D;;Sy;;;;;;;;;;;;
-201E;;Sy;;;;;;;;;;;;
-201F;;Sy;;;;;;;;;;;;
-2039;;Sy;;;;;;;;;;;;
-203A;;Sy;;;;;;;;;;;;
-2045;;Sy;;;;;;;;;;;;
-2046;;Sy;;;;;;;;;;;;
-207D;;Sy;;;;;;;;;;;;
-207E;;Sy;;;;;;;;;;;;
-208D;;Sy;;;;;;;;;;;;
-208E;;Sy;;;;;;;;;;;;
-2329;;Sy;;;;;;;;;;;;
-232A;;Sy;;;;;;;;;;;;
-3008;;Sy;;;;;;;;;;;;
-3009;;Sy;;;;;;;;;;;;
-300A;;Sy;;;;;;;;;;;;
-300B;;Sy;;;;;;;;;;;;
-300C;;Sy;;;;;;;;;;;;
-300D;;Sy;;;;;;;;;;;;
-300E;;Sy;;;;;;;;;;;;
-300F;;Sy;;;;;;;;;;;;
-3010;;Sy;;;;;;;;;;;;
-3011;;Sy;;;;;;;;;;;;
-3014;;Sy;;;;;;;;;;;;
-3015;;Sy;;;;;;;;;;;;
-3016;;Sy;;;;;;;;;;;;
-3017;;Sy;;;;;;;;;;;;
-3018;;Sy;;;;;;;;;;;;
-3019;;Sy;;;;;;;;;;;;
-301A;;Sy;;;;;;;;;;;;
-301B;;Sy;;;;;;;;;;;;
-301D;;Sy;;;;;;;;;;;;
-301E;;Sy;;;;;;;;;;;;
-FD3E;;Sy;;;;;;;;;;;;
-FD3F;;Sy;;;;;;;;;;;;
-FE35;;Sy;;;;;;;;;;;;
-FE36;;Sy;;;;;;;;;;;;
-FE37;;Sy;;;;;;;;;;;;
-FE38;;Sy;;;;;;;;;;;;
-FE39;;Sy;;;;;;;;;;;;
-FE3A;;Sy;;;;;;;;;;;;
-FE3B;;Sy;;;;;;;;;;;;
-FE3C;;Sy;;;;;;;;;;;;
-FE3D;;Sy;;;;;;;;;;;;
-FE3E;;Sy;;;;;;;;;;;;
-FE3F;;Sy;;;;;;;;;;;;
-FE40;;Sy;;;;;;;;;;;;
-FE41;;Sy;;;;;;;;;;;;
-FE42;;Sy;;;;;;;;;;;;
-FE43;;Sy;;;;;;;;;;;;
-FE44;;Sy;;;;;;;;;;;;
-FE59;;Sy;;;;;;;;;;;;
-FE5A;;Sy;;;;;;;;;;;;
-FE5B;;Sy;;;;;;;;;;;;
-FE5C;;Sy;;;;;;;;;;;;
-FE5D;;Sy;;;;;;;;;;;;
-FE5E;;Sy;;;;;;;;;;;;
-FF08;;Sy;;;;;;;;;;;;
-FF09;;Sy;;;;;;;;;;;;
-FF3B;;Sy;;;;;;;;;;;;
-FF3D;;Sy;;;;;;;;;;;;
-FF5B;;Sy;;;;;;;;;;;;
-FF5D;;Sy;;;;;;;;;;;;
-FF62;;Sy;;;;;;;;;;;;
-FF63;;Sy;;;;;;;;;;;;
-#
-# Hex digit.
-#
-0030;;Hd;;;;;;;;;;;;
-0031;;Hd;;;;;;;;;;;;
-0032;;Hd;;;;;;;;;;;;
-0033;;Hd;;;;;;;;;;;;
-0034;;Hd;;;;;;;;;;;;
-0035;;Hd;;;;;;;;;;;;
-0036;;Hd;;;;;;;;;;;;
-0037;;Hd;;;;;;;;;;;;
-0038;;Hd;;;;;;;;;;;;
-0039;;Hd;;;;;;;;;;;;
-0041;;Hd;;;;;;;;;;;;
-0042;;Hd;;;;;;;;;;;;
-0043;;Hd;;;;;;;;;;;;
-0044;;Hd;;;;;;;;;;;;
-0045;;Hd;;;;;;;;;;;;
-0046;;Hd;;;;;;;;;;;;
-0061;;Hd;;;;;;;;;;;;
-0062;;Hd;;;;;;;;;;;;
-0063;;Hd;;;;;;;;;;;;
-0064;;Hd;;;;;;;;;;;;
-0065;;Hd;;;;;;;;;;;;
-0066;;Hd;;;;;;;;;;;;
-FF10;;Hd;;;;;;;;;;;;
-FF11;;Hd;;;;;;;;;;;;
-FF12;;Hd;;;;;;;;;;;;
-FF13;;Hd;;;;;;;;;;;;
-FF14;;Hd;;;;;;;;;;;;
-FF15;;Hd;;;;;;;;;;;;
-FF16;;Hd;;;;;;;;;;;;
-FF17;;Hd;;;;;;;;;;;;
-FF18;;Hd;;;;;;;;;;;;
-FF19;;Hd;;;;;;;;;;;;
-FF21;;Hd;;;;;;;;;;;;
-FF22;;Hd;;;;;;;;;;;;
-FF23;;Hd;;;;;;;;;;;;
-FF24;;Hd;;;;;;;;;;;;
-FF25;;Hd;;;;;;;;;;;;
-FF26;;Hd;;;;;;;;;;;;
-FF41;;Hd;;;;;;;;;;;;
-FF42;;Hd;;;;;;;;;;;;
-FF43;;Hd;;;;;;;;;;;;
-FF44;;Hd;;;;;;;;;;;;
-FF45;;Hd;;;;;;;;;;;;
-FF46;;Hd;;;;;;;;;;;;
-#
-# Quote marks.
-#
-0022;;Qm;;;;;;;;;;;;
-0027;;Qm;;;;;;;;;;;;
-00AB;;Qm;;;;;;;;;;;;
-00BB;;Qm;;;;;;;;;;;;
-2018;;Qm;;;;;;;;;;;;
-2019;;Qm;;;;;;;;;;;;
-201A;;Qm;;;;;;;;;;;;
-201B;;Qm;;;;;;;;;;;;
-201C;;Qm;;;;;;;;;;;;
-201D;;Qm;;;;;;;;;;;;
-201E;;Qm;;;;;;;;;;;;
-201F;;Qm;;;;;;;;;;;;
-2039;;Qm;;;;;;;;;;;;
-203A;;Qm;;;;;;;;;;;;
-300C;;Qm;;;;;;;;;;;;
-300D;;Qm;;;;;;;;;;;;
-300E;;Qm;;;;;;;;;;;;
-300F;;Qm;;;;;;;;;;;;
-301D;;Qm;;;;;;;;;;;;
-301E;;Qm;;;;;;;;;;;;
-301F;;Qm;;;;;;;;;;;;
-FE41;;Qm;;;;;;;;;;;;
-FE42;;Qm;;;;;;;;;;;;
-FE43;;Qm;;;;;;;;;;;;
-FE44;;Qm;;;;;;;;;;;;
-FF02;;Qm;;;;;;;;;;;;
-FF07;;Qm;;;;;;;;;;;;
-FF62;;Qm;;;;;;;;;;;;
-FF63;;Qm;;;;;;;;;;;;
--- a/intl/unicharutil/tools/UCDATAREADME.txt
+++ b/intl/unicharutil/tools/UCDATAREADME.txt
@ -1,207 +0,0 @@
-#
-# $Id: UCDATAREADME.txt,v 1.1 1999/01/08 00:19:20 ftang%netscape.com Exp $
-#
-
-                           MUTT UCData Package 1.9
-                           -----------------------
-
-This is a package that supports ctype-like operations for Unicode UCS-2 text
-(and surrogates), case mapping, and decomposition lookup.  To use it, you will
-need to get the "UnicodeData-2.0.14.txt" (or later) file from the Unicode Web
-or FTP site.
-
-This package consists of two parts:
-
-  1. A program called "ucgendat" which generates five data files from the
-     UnicodeData-2.*.txt file.  The files are:
-
-     A. case.dat   - the case mappings.
-     B. ctype.dat  - the character property tables.
-     C. decomp.dat - the character decompositions.
-     D. cmbcl.dat  - the non-zero combining classes.
-     E. num.dat    - the codes representing numbers.
-
-  2. The "ucdata.[ch]" files which implement the functions needed to
-     check to see if a character matches groups of properties, to map between
-     upper, lower, and title case, to look up the decomposition of a
-     character, look up the combining class of a character, and get the number
-     value of a character.
-
-A short reference to the functions available is in the "api.txt" file.
-
-Techie Details
-==============
-
-The "ucgendat" program parses files from the command line which are all in the
-Unicode Character Database (UCDB) format.  An additional properties file,
-"MUTTUCData.txt", provides some extra properties for some characters.
-
-The program looks for the two character properties fields (2 and 4), the
-combining class field (3), the decomposition field (5), the numeric value
-field (8), and the case mapping fields (12, 13, and 14).  The decompositions
-are recursively expanded before being written out.
-
-The decomposition table contains all the canonical decompositions.  This means
-all decompositions that do not have tags such as "<compat>" or "<font>".
-
-The data is almost all stored as unsigned longs (32-bits assumed) and the
-routines that load the data take care of endian swaps when necessary.  This
-also means that surrogates (>= 0x10000) can be placed in the data files the
-"ucgendat" program parses.
-
-The data is written as external files and broken into five parts so it can be
-selectively updated at runtime if necessary.
-
-The data files currently generated from the "ucgendat" program total about 56K
-in size all together.
-
-The format of the binary data files is documented in the "format.txt" file.
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-13 December 1998
-
-CHANGES
-=======
-
-Version 1.9
-----------
-1. Fixed a problem with an incorrect amount of storage being allocated for the
-   combining class nodes.
-
-2. Fixed an invalid initialization in the number code.
-
-3. Changed the Java template file formatting a bit.
-
-4. Added tables and function for getting decompositions in the Java class.
-
-Version 1.8
-----------
-1. Fixed a problem with adding certain ranges.
-
-2. Added two more macros for testing for identifiers.
-
-3. Tested with the UnicodeData-2.1.5.txt file.
-
-Version 1.7
-----------
-1. Fixed a problem with looking up decompositions in "ucgendat."
-
-Version 1.6
-----------
-1. Added two new properties introduced with UnicodeData-2.1.4.txt.
-
-2. Changed the "ucgendat.c" program a little to automatically align the
-   property data on a 4-byte boundary when new properties are added.
-
-3. Changed the "ucgendat.c" programs to only generate canonical
-   decompositions.
-
-4. Added two new macros ucisinitialpunct() and ucisfinalpunct() to check for
-   initial and final punctuation characters.
-
-5. Minor additions and changes to the documentation.
-
-Version 1.5
-----------
-1. Changed all file open calls to include binary mode with "b" for DOS/WIN
-   platforms.
-
-2. Wrapped the unistd.h include so it won't be included when compiled under
-   Win32.
-
-3. Fixed a bad range check for hex digits in ucgendat.c.
-
-4. Fixed a bad endian swap for combining classes.
-
-5. Added code to make a number table and associated lookup functions.
-   Functions added are ucnumber(), ucdigit(), and ucgetnumber().  The last
-   function is to maintain compatibility with John Cowan's "uctype" package.
-
-Version 1.4
-----------
-1. Fixed a bug with adding a range.
-
-2. Fixed a bug with inserting a range in order.
-
-3. Fixed incorrectly specified ucisdefined() and ucisundefined() macros.
-
-4. Added the missing unload for the combining class data.
-
-5. Fixed a bad macro placement in ucisweak().
-
-Version 1.3
-----------
-1. Bug with case mapping calculations fixed.
-
-2. Bug with empty character property entries fixed.
-
-3. Bug with incorrect type in the combining class lookup fixed.
-
-4. Some corrections done to api.txt.
-
-5. Bug in certain character property lookups fixed.
-
-6. Added a character property table that records the defined characters.
-
-7. Replaced ucisunknown() with ucisdefined() and ucisundefined().
-
-Version 1.2
-----------
-1. Added code to ucgendat to generate a combining class table.
-
-2. Fixed an endian problem with the byte count of decompositions.
-
-3. Fixed some minor problems in the "format.txt" file.
-
-4. Removed some bogus "Ss" values from MUTTUCData.txt file.
-
-5. Added API function to get combining class.
-
-6. Changed the open mode to "rb" so binary data files will be opened correctly
-   on DOS/WIN as well as other platforms.
-
-7. Added the "api.txt" file.
-
-Version 1.1
-----------
-1. Added ucisxdigit() which I overlooked.
-
-2. Added UC_LT to the ucisalpha() macro which I overlooked.
-
-3. Change uciscntrl() to include UC_CF.
-
-4. Added ucisocntrl() and ucfntcntrl() macros.
-
-5. Added a ucisblank() which I overlooked.
-
-6. Added missing properties to ucissymbol() and ucisnumber().
-
-7. Added ucisgraph() and ucisprint().
-
-8. Changed the "Mr" property to "Sy" to mark this subset of mirroring
-   characters as symmetric to avoid trampling the Unicode/ISO10646 sense of
-   mirroring.
-
-9. Added another property called "Ss" which includes control characters
-   traditionally seen as spaces in the isspace() macro.
-
-10. Added a bunch of macros to be API compatible with John Cowan's package.
-
-ACKNOWLEDGEMENTS
-================
-
-Thanks go to John Cowan <cowan@locke.ccil.org> for pointing out lots of
-missing things and giving me stuff, particularly a bunch of new macros.
-
-Thanks go to Bob Verbrugge <bob_verbrugge@nl.compuware.com> for pointing out
-various bugs.
-
-Thanks go to Christophe Pierret <cpierret@businessobjects.com> for pointing
-out that file modes need to have "b" for DOS/WIN machines, pointing out
-unistd.h is not a Win 32 header, and pointing out a problem with ucisalnum().
-
-Thanks go to Kent Johnson <kent@pondview.mv.com> for finding a bug that caused
-incomplete decompositions to be generated by the "ucgendat" program.
-
-Thanks go to Valeriy E. Ushakov <uwe@ptc.spbu.ru> for spotting an allocation
-error and an initialization error.
--- a/intl/unicharutil/tools/data/case.dat
+++ b/intl/unicharutil/tools/data/case.dat
--- a/intl/unicharutil/tools/data/cmbcl.dat
+++ b/intl/unicharutil/tools/data/cmbcl.dat
--- a/intl/unicharutil/tools/data/ctype.dat
+++ b/intl/unicharutil/tools/data/ctype.dat
--- a/intl/unicharutil/tools/data/decomp.dat
+++ b/intl/unicharutil/tools/data/decomp.dat
--- a/intl/unicharutil/tools/data/num.dat
+++ b/intl/unicharutil/tools/data/num.dat
--- a/intl/unicharutil/tools/format.txt
+++ b/intl/unicharutil/tools/format.txt
@ -1,243 +0,0 @@
-#
-# $Id: format.txt,v 1.1 1999/01/08 00:19:20 ftang%netscape.com Exp $
-#
-
-CHARACTER DATA
-==============
-
-This package generates some data files that contain character properties useful
-for text processing.
-
-CHARACTER PROPERTIES
-====================
-
-The first data file is called "ctype.dat" and contains a compressed form of
-the character properties found in the Unicode Character Database (UCDB).
-Additional properties can be specified in limited UCDB format in another file
-to avoid modifying the original UCDB.
-
-The following is a property name and code table to be used with the character
-data:
-
-NAME CODE DESCRIPTION
---------------------
-Mn   0    Mark, Non-Spacing
-Mc   1    Mark, Spacing Combining
-Me   2    Mark, Enclosing
-Nd   3    Number, Decimal Digit
-Nl   4    Number, Letter
-No   5    Number, Other
-Zs   6    Separator, Space
-Zl   7    Separator, Line
-Zp   8    Separator, Paragraph
-Cc   9    Other, Control
-Cf   10   Other, Format
-Cs   11   Other, Surrogate
-Co   12   Other, Private Use
-Cn   13   Other, Not Assigned
-Lu   14   Letter, Uppercase
-Ll   15   Letter, Lowercase
-Lt   16   Letter, Titlecase
-Lm   17   Letter, Modifier
-Lo   18   Letter, Other
-Pc   19   Punctuation, Connector
-Pd   20   Punctuation, Dash
-Ps   21   Punctuation, Open
-Pe   22   Punctuation, Close
-Po   23   Punctuation, Other
-Sm   24   Symbol, Math
-Sc   25   Symbol, Currency
-Sk   26   Symbol, Modifier
-So   27   Symbol, Other
-L    28   Left-To-Right
-R    29   Right-To-Left
-EN   30   European Number
-ES   31   European Number Separator
-ET   32   European Number Terminator
-AN   33   Arabic Number
-CS   34   Common Number Separator
-B    35   Block Separator
-S    36   Segment Separator
-WS   37   Whitespace
-ON   38   Other Neutrals
-Pi   47   Punctuation, Initial
-Pf   48   Punctuation, Final
-#
-# Implementation specific properties.
-#
-Cm   39   Composite
-Nb   40   Non-Breaking
-Sy   41   Symmetric (characters which are part of open/close pairs)
-Hd   42   Hex Digit
-Qm   43   Quote Mark
-Mr   44   Mirroring
-Ss   45   Space, Other (controls viewed as spaces in ctype isspace())
-Cp   46   Defined character
-
-The actual binary data is formatted as follows:
-
-  Assumptions: unsigned short is at least 16-bits in size and unsigned long
-               is at least 32-bits in size.
-
-    unsigned short ByteOrderMark
-    unsigned short OffsetArraySize
-    unsigned long  Bytes
-    unsigned short Offsets[OffsetArraySize + 1]
-    unsigned long  Ranges[N], N = value of Offsets[OffsetArraySize]
-
-  The Bytes field provides the total byte count used for the Offsets[] and
-  Ranges[] arrays.  The Offsets[] array is aligned on a 4-byte boundary and
-  there is always one extra node on the end to hold the final index of the
-  Ranges[] array.  The Ranges[] array contains pairs of 4-byte values
-  representing a range of Unicode characters.  The pairs are arranged in
-  increasing order by the first character code in the range.
-
-  Determining if a particular character is in the property list requires a
-  simple binary search to determine if a character is in any of the ranges
-  for the property.
-
-  If the ByteOrderMark is equal to 0xFFFE, then the data was generated on a
-  machine with a different endian order and the values must be byte-swapped.
-
-  To swap a 16-bit value:
-     c = (c >> 8) | ((c & 0xff) << 8)
-
-  To swap a 32-bit value:
-     c = ((c & 0xff) << 24) | (((c >> 8) & 0xff) << 16) |
-         (((c >> 16) & 0xff) << 8) | (c >> 24)
-
-CASE MAPPINGS
-=============
-
-The next data file is called "case.dat" and contains three case mapping tables
-in the following order: upper, lower, and title case.  Each table is in
-increasing order by character code and each mapping contains 3 unsigned longs
-which represent the possible mappings.
-
-The format for the binary form of these tables is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumMappingNodes, count of all mapping nodes
-  unsigned short CaseTableSizes[2], upper and lower mapping node counts
-  unsigned long  CaseTables[NumMappingNodes]
-
-  The starting indexes of the case tables are calculated as following:
-
-    UpperIndex = 0;
-    LowerIndex = CaseTableSizes[0] * 3;
-    TitleIndex = LowerIndex + CaseTableSizes[1] * 3;
-
-  The order of the fields for the three tables are:
-
-    Upper case
-    ----------
-    unsigned long upper;
-    unsigned long lower;
-    unsigned long title;
-
-    Lower case
-    ----------
-    unsigned long lower;
-    unsigned long upper;
-    unsigned long title;
-
-    Title case
-    ----------
-    unsigned long title;
-    unsigned long upper;
-    unsigned long lower;
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  Because the tables are in increasing order by character code, locating a
-  mapping requires a simple binary search on one of the 3 codes that make up
-  each node.
-
-  It is important to note that there can only be 65536 mapping nodes which
-  divided into 3 portions allows 21845 nodes for each case mapping table.  The
-  distribution of mappings may be more or less than 21845 per table, but only
-  65536 are allowed.
-
-DECOMPOSITIONS
-==============
-
-The next data file is called "decomp.dat" and contains the decomposition data
-for all characters with decompositions containing more than one character and
-are *not* compatibility decompositions.  Compatibility decompositions are
-signaled in the UCDB format by the use of the <compat> tag in the
-decomposition field.  Each list of character codes represents a full
-decomposition of a composite character.  The nodes are arranged in increasing
-order by character code.
-
-The format for the binary form of this table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumDecompNodes, count of all decomposition nodes
-  unsigned long  Bytes
-  unsigned long  DecompNodes[(NumDecompNodes * 2) + 1]
-  unsigned long  Decomp[N], N = sum of all counts in DecompNodes[]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The DecompNodes[] array consists of pairs of unsigned longs, the first of
-  which is the character code and the second is the initial index of the list
-  of character codes representing the decomposition.
-
-  Locating the decomposition of a composite character requires a binary search
-  for a character code in the DecompNodes[] array and using its index to
-  locate the start of the decomposition.  The length of the decomposition list
-  is the index in the following element in DecompNode[] minus the current
-  index.
-
-COMBINING CLASSES
-=================
-
-The fourth data file is called "cmbcl.dat" and contains the characters with
-non-zero combining classes.
-
-The format for the binary form of this table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumCCLNodes
-  unsigned long  Bytes
-  unsigned long  CCLNodes[NumCCLNodes * 3]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The CCLNodes[] array consists of groups of three unsigned longs.  The first
-  and second are the beginning and ending of a range and the third is the
-  combining class of that range.
-
-  If a character is not found in this table, then the combining class is
-  assumed to be 0.
-
-  It is important to note that only 65536 distinct ranges plus combining class
-  can be specified because the NumCCLNodes is usually a 16-bit number.
-
-NUMBER TABLE
-============
-
-The final data file is called "num.dat" and contains the characters that have
-a numeric value associated with them.
-
-The format for the binary form of the table is:
-
-  unsigned short ByteOrderMark
-  unsigned short NumNumberNodes
-  unsigned long  Bytes
-  unsigned long  NumberNodes[NumNumberNodes]
-  unsigned short ValueNodes[(Bytes - (NumNumberNodes * sizeof(unsigned long)))
-                            / sizeof(short)]
-
-  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
-  same way as described in the CHARACTER PROPERTIES section.
-
-  The NumberNodes array contains pairs of values, the first of which is the
-  character code and the second an index into the ValueNodes array.  The
-  ValueNodes array contains pairs of integers which represent the numerator
-  and denominator of the numeric value of the character.  If the character
-  happens to map to an integer, both the values in ValueNodes will be the
-  same.
--- a/intl/unicharutil/tools/moz.build
+++ b/intl/unicharutil/tools/moz.build
@ -1,12 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# vim: set filetype=python:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-Program('ucgendat')
-
-SOURCES += [
-    'ucgendat.c',
-]
-
--- a/intl/unicharutil/tools/ucgendat.c
+++ b/intl/unicharutil/tools/ucgendat.c
--- a/intl/unicharutil/ucdata.c
+++ b/intl/unicharutil/ucdata.c
--- a/intl/unicharutil/ucdata.h
+++ b/intl/unicharutil/ucdata.h
@ -1,306 +0,0 @@
-/*
- * Copyright 1996, 1997, 1998 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef _h_ucdata
-#define _h_ucdata
-
-/*
- * $Id: ucdata.h,v 1.1 1999/01/08 00:19:12 ftang%netscape.com Exp $
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#undef __
-#ifdef __STDC__
-#define __(x) x
-#else
-#define __(x) ()
-#endif
-
-#define UCDATA_VERSION "1.9"
-
-/**************************************************************************
- *
- * Masks and macros for character properties.
- *
- **************************************************************************/
-
-/*
- * Values that can appear in the `mask1' parameter of the ucisprop()
- * function.
- */
-#define UC_MN 0x00000001 /* Mark, Non-Spacing          */
-#define UC_MC 0x00000002 /* Mark, Spacing Combining    */
-#define UC_ME 0x00000004 /* Mark, Enclosing            */
-#define UC_ND 0x00000008 /* Number, Decimal Digit      */
-#define UC_NL 0x00000010 /* Number, Letter             */
-#define UC_NO 0x00000020 /* Number, Other              */
-#define UC_ZS 0x00000040 /* Separator, Space           */
-#define UC_ZL 0x00000080 /* Separator, Line            */
-#define UC_ZP 0x00000100 /* Separator, Paragraph       */
-#define UC_CC 0x00000200 /* Other, Control             */
-#define UC_CF 0x00000400 /* Other, Format              */
-#define UC_OS 0x00000800 /* Other, Surrogate           */
-#define UC_CO 0x00001000 /* Other, Private Use         */
-#define UC_CN 0x00002000 /* Other, Not Assigned        */
-#define UC_LU 0x00004000 /* Letter, Uppercase          */
-#define UC_LL 0x00008000 /* Letter, Lowercase          */
-#define UC_LT 0x00010000 /* Letter, Titlecase          */
-#define UC_LM 0x00020000 /* Letter, Modifier           */
-#define UC_LO 0x00040000 /* Letter, Other              */
-#define UC_PC 0x00080000 /* Punctuation, Connector     */
-#define UC_PD 0x00100000 /* Punctuation, Dash          */
-#define UC_PS 0x00200000 /* Punctuation, Open          */
-#define UC_PE 0x00400000 /* Punctuation, Close         */
-#define UC_PO 0x00800000 /* Punctuation, Other         */
-#define UC_SM 0x01000000 /* Symbol, Math               */
-#define UC_SC 0x02000000 /* Symbol, Currency           */
-#define UC_SK 0x04000000 /* Symbol, Modifier           */
-#define UC_SO 0x08000000 /* Symbol, Other              */
-#define UC_L  0x10000000 /* Left-To-Right              */
-#define UC_R  0x20000000 /* Right-To-Left              */
-#define UC_EN 0x40000000 /* European Number            */
-#define UC_ES 0x80000000 /* European Number Separator  */
-
-/*
- * Values that can appear in the `mask2' parameter of the ucisprop()
- * function.
- */
-#define UC_ET 0x00000001 /* European Number Terminator */
-#define UC_AN 0x00000002 /* Arabic Number              */
-#define UC_CS 0x00000004 /* Common Number Separator    */
-#define UC_B  0x00000008 /* Block Separator            */
-#define UC_S  0x00000010 /* Segment Separator          */
-#define UC_WS 0x00000020 /* Whitespace                 */
-#define UC_ON 0x00000040 /* Other Neutrals             */
-/*
- * Implementation specific character properties.
- */
-#define UC_CM 0x00000080 /* Composite                  */
-#define UC_NB 0x00000100 /* Non-Breaking               */
-#define UC_SY 0x00000200 /* Symmetric                  */
-#define UC_HD 0x00000400 /* Hex Digit                  */
-#define UC_QM 0x00000800 /* Quote Mark                 */
-#define UC_MR 0x00001000 /* Mirroring                  */
-#define UC_SS 0x00002000 /* Space, other               */
-
-#define UC_CP 0x00004000 /* Defined                    */
-
-/*
- * Added for UnicodeData-2.1.3.
- */
-#define UC_PI 0x00008000 /* Punctuation, Initial       */
-#define UC_PF 0x00010000 /* Punctuation, Final         */
-
-/*
- * This is the primary function for testing to see if a character has some set
- * of properties.  The macros that test for various character properties all
- * call this function with some set of masks.
- */
-extern int ucisprop __((unsigned long code, unsigned long mask1,
-                        unsigned long mask2));
-
-#define ucisalpha(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT, 0)
-#define ucisdigit(cc) ucisprop(cc, UC_ND, 0)
-#define ucisalnum(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT|UC_ND, 0)
-#define uciscntrl(cc) ucisprop(cc, UC_CC|UC_CF, 0)
-#define ucisspace(cc) ucisprop(cc, UC_ZS|UC_SS, 0)
-#define ucisblank(cc) ucisprop(cc, UC_ZS, 0)
-#define ucispunct(cc) ucisprop(cc, UC_PD|UC_PS|UC_PE|UC_PO, UC_PI|UC_PF)
-#define ucisgraph(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
-                               UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
-                               UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
-                               UC_SO, UC_PI|UC_PF)
-#define ucisprint(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
-                               UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
-                               UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
-                               UC_SO|UC_ZS, UC_PI|UC_PF)
-#define ucisupper(cc) ucisprop(cc, UC_LU, 0)
-#define ucislower(cc) ucisprop(cc, UC_LL, 0)
-#define ucistitle(cc) ucisprop(cc, UC_LT, 0)
-#define ucisxdigit(cc) ucisprop(cc, 0, UC_HD)
-
-#define ucisisocntrl(cc) ucisprop(cc, UC_CC, 0)
-#define ucisfmtcntrl(cc) ucisprop(cc, UC_CF, 0)
-
-#define ucissymbol(cc) ucisprop(cc, UC_SM|UC_SC|UC_SO|UC_SK, 0)
-#define ucisnumber(cc) ucisprop(cc, UC_ND|UC_NO|UC_NL, 0)
-#define ucisnonspacing(cc) ucisprop(cc, UC_MN, 0)
-#define ucisopenpunct(cc) ucisprop(cc, UC_PS, 0)
-#define ucisclosepunct(cc) ucisprop(cc, UC_PE, 0)
-#define ucisinitialpunct(cc) ucisprop(cc, 0, UC_PI)
-#define ucisfinalpunct(cc) ucisprop(cc, 0, UC_PF)
-
-#define uciscomposite(cc) ucisprop(cc, 0, UC_CM)
-#define ucishex(cc) ucisprop(cc, 0, UC_HD)
-#define ucisquote(cc) ucisprop(cc, 0, UC_QM)
-#define ucissymmetric(cc) ucisprop(cc, 0, UC_SY)
-#define ucismirroring(cc) ucisprop(cc, 0, UC_MR)
-#define ucisnonbreaking(cc) ucisprop(cc, 0, UC_NB)
-
-/*
- * Directionality macros.
- */
-#define ucisrtl(cc) ucisprop(cc, UC_R, 0)
-#define ucisltr(cc) ucisprop(cc, UC_L, 0)
-#define ucisstrong(cc) ucisprop(cc, UC_L|UC_R, 0)
-#define ucisweak(cc) ucisprop(cc, UC_EN|UC_ES, UC_ET|UC_AN|UC_CS)
-#define ucisneutral(cc) ucisprop(cc, 0, UC_B|UC_S|UC_WS|UC_ON)
-#define ucisseparator(cc) ucisprop(cc, 0, UC_B|UC_S)
-
-/*
- * Other macros inspired by John Cowan.
- */
-#define ucismark(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME, 0)
-#define ucismodif(cc) ucisprop(cc, UC_LM, 0)
-#define ucisletnum(cc) ucisprop(cc, UC_NL, 0)
-#define ucisconnect(cc) ucisprop(cc, UC_PC, 0)
-#define ucisdash(cc) ucisprop(cc, UC_PD, 0)
-#define ucismath(cc) ucisprop(cc, UC_SM, 0)
-#define uciscurrency(cc) ucisprop(cc, UC_SC, 0)
-#define ucismodifsymbol(cc) ucisprop(cc, UC_SK, 0)
-#define ucisnsmark(cc) ucisprop(cc, UC_MN, 0)
-#define ucisspmark(cc) ucisprop(cc, UC_MC, 0)
-#define ucisenclosing(cc) ucisprop(cc, UC_ME, 0)
-#define ucisprivate(cc) ucisprop(cc, UC_CO, 0)
-#define ucissurrogate(cc) ucisprop(cc, UC_OS, 0)
-#define ucislsep(cc) ucisprop(cc, UC_ZL, 0)
-#define ucispsep(cc) ucisprop(cc, UC_ZP, 0)
-
-#define ucisidentstart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL, 0)
-#define ucisidentpart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL|\
-                                   UC_MN|UC_MC|UC_ND|UC_PC|UC_CF, 0)
-
-#define ucisdefined(cc) ucisprop(cc, 0, UC_CP)
-#define ucisundefined(cc) !ucisprop(cc, 0, UC_CP)
-
-/*
- * Other miscellaneous character property macros.
- */
-#define ucishan(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\
-                     ((cc) >= 0xf900 && (cc) <= 0xfaff))
-#define ucishangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff)
-
-/**************************************************************************
- *
- * Functions for case conversion.
- *
- **************************************************************************/
-
-extern unsigned long uctoupper __((unsigned long code));
-extern unsigned long uctolower __((unsigned long code));
-extern unsigned long uctotitle __((unsigned long code));
-
-/**************************************************************************
- *
- * Functions for getting decompositions.
- *
- **************************************************************************/
-
-/*
- * This routine determines if the code has a decomposition.  If it returns 0,
- * there is no decomposition.  Any other value indicates a decomposition was
- * returned.
- */
-extern int ucdecomp __((unsigned long code, unsigned long *num,
-
-                        unsigned long **decomp));
-
-/*
- * If the code is a Hangul syllable, this routine decomposes it into the array
- * passed.  The array size should be at least 3.
- */
-extern int ucdecomp_hangul __((unsigned long code, unsigned long *num,
-                               unsigned long decomp[]));
-
-/**************************************************************************
- *
- * Functions for getting combining classes.
- *
- **************************************************************************/
-
-/*
- * This will return the combining class for a character to be used with the
- * Canonical Ordering algorithm.
- */
-extern unsigned long uccombining_class __((unsigned long code));
-
-/**************************************************************************
- *
- * Functions for getting numbers and digits.
- *
- **************************************************************************/
-
-struct ucnumber {
-    int numerator;
-    int denominator;
-};
-
-extern int ucnumber_lookup __((unsigned long code, struct ucnumber *num));
-extern int ucdigit_lookup __((unsigned long code, int *digit));
-
-/*
- * For compatibility with John Cowan's "uctype" package.
- */
-extern struct ucnumber ucgetnumber __((unsigned long code));
-extern int ucgetdigit __((unsigned long code));
-
-/**************************************************************************
- *
- * Functions library initialization and cleanup.
- *
- **************************************************************************/
-
-/*
- * Macros for specifying the data tables to be loaded for ucdata_load().
- */
-#define UCDATA_CASE   0x01
-#define UCDATA_CTYPE  0x02
-#define UCDATA_DECOMP 0x04
-#define UCDATA_CMBCL  0x08
-#define UCDATA_NUM    0x10
-
-#define UCDATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
-                    UCDATA_CMBCL|UCDATA_NUM)
-
-/*
- * Functions to load, unload, and reload specific data files.
- */
-extern void ucdata_load __((char *paths, int mask));
-extern void ucdata_unload __((int mask));
-extern void ucdata_reload __((char *paths, int mask));
-
-/*
- * Deprecated functions, now just compatibility macros.
- */
-#define ucdata_setup(p) ucdata_load(p, UCDATA_ALL)
-#define ucdata_cleanup() ucdata_unload(UCDATA_ALL)
-
-#undef __
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _h_ucdata */