diff --git a/intl/unicharutil/tools/MUTTUCData.txt b/intl/unicharutil/tools/MUTTUCData.txt deleted file mode 100644 index 7d44e290ac7a..000000000000 --- a/intl/unicharutil/tools/MUTTUCData.txt +++ /dev/null @@ -1,208 +0,0 @@ -# -# $Id: MUTTUCData.txt,v 1.1 1999/01/08 00:19:19 ftang%netscape.com Exp $ -# -# Copyright 1996, 1997, 1998 Computing Research Labs, -# New Mexico State University -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT -# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -# THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# -# Implementation specific character properties. -# -# -# Space, other. -# -0009;;Ss;;;;;;;;;;;; -000A;;Ss;;;;;;;;;;;; -000B;;Ss;;;;;;;;;;;; -000C;;Ss;;;;;;;;;;;; -000D;;Ss;;;;;;;;;;;; -# -# Non-breaking. -# -00A0;;Nb;;;;;;;;;;;; -2007;;Nb;;;;;;;;;;;; -2011;;Nb;;;;;;;;;;;; -FEFF;;Nb;;;;;;;;;;;; -# -# Symmetric. -# -0028;;Sy;;;;;;;;;;;; -0029;;Sy;;;;;;;;;;;; -005B;;Sy;;;;;;;;;;;; -005D;;Sy;;;;;;;;;;;; -007B;;Sy;;;;;;;;;;;; -007D;;Sy;;;;;;;;;;;; -00AB;;Sy;;;;;;;;;;;; -00BB;;Sy;;;;;;;;;;;; -0F3A;;Sy;;;;;;;;;;;; -0F3B;;Sy;;;;;;;;;;;; -0F3C;;Sy;;;;;;;;;;;; -0F3D;;Sy;;;;;;;;;;;; -0F3E;;Sy;;;;;;;;;;;; -0F3F;;Sy;;;;;;;;;;;; -2018;;Sy;;;;;;;;;;;; -2019;;Sy;;;;;;;;;;;; -201A;;Sy;;;;;;;;;;;; -201B;;Sy;;;;;;;;;;;; -201C;;Sy;;;;;;;;;;;; -201D;;Sy;;;;;;;;;;;; -201E;;Sy;;;;;;;;;;;; -201F;;Sy;;;;;;;;;;;; -2039;;Sy;;;;;;;;;;;; -203A;;Sy;;;;;;;;;;;; -2045;;Sy;;;;;;;;;;;; -2046;;Sy;;;;;;;;;;;; -207D;;Sy;;;;;;;;;;;; -207E;;Sy;;;;;;;;;;;; -208D;;Sy;;;;;;;;;;;; -208E;;Sy;;;;;;;;;;;; -2329;;Sy;;;;;;;;;;;; -232A;;Sy;;;;;;;;;;;; -3008;;Sy;;;;;;;;;;;; -3009;;Sy;;;;;;;;;;;; -300A;;Sy;;;;;;;;;;;; -300B;;Sy;;;;;;;;;;;; -300C;;Sy;;;;;;;;;;;; -300D;;Sy;;;;;;;;;;;; -300E;;Sy;;;;;;;;;;;; -300F;;Sy;;;;;;;;;;;; -3010;;Sy;;;;;;;;;;;; -3011;;Sy;;;;;;;;;;;; -3014;;Sy;;;;;;;;;;;; -3015;;Sy;;;;;;;;;;;; -3016;;Sy;;;;;;;;;;;; -3017;;Sy;;;;;;;;;;;; -3018;;Sy;;;;;;;;;;;; -3019;;Sy;;;;;;;;;;;; -301A;;Sy;;;;;;;;;;;; -301B;;Sy;;;;;;;;;;;; -301D;;Sy;;;;;;;;;;;; -301E;;Sy;;;;;;;;;;;; -FD3E;;Sy;;;;;;;;;;;; -FD3F;;Sy;;;;;;;;;;;; -FE35;;Sy;;;;;;;;;;;; -FE36;;Sy;;;;;;;;;;;; -FE37;;Sy;;;;;;;;;;;; -FE38;;Sy;;;;;;;;;;;; -FE39;;Sy;;;;;;;;;;;; -FE3A;;Sy;;;;;;;;;;;; -FE3B;;Sy;;;;;;;;;;;; -FE3C;;Sy;;;;;;;;;;;; -FE3D;;Sy;;;;;;;;;;;; -FE3E;;Sy;;;;;;;;;;;; -FE3F;;Sy;;;;;;;;;;;; -FE40;;Sy;;;;;;;;;;;; -FE41;;Sy;;;;;;;;;;;; -FE42;;Sy;;;;;;;;;;;; -FE43;;Sy;;;;;;;;;;;; -FE44;;Sy;;;;;;;;;;;; -FE59;;Sy;;;;;;;;;;;; -FE5A;;Sy;;;;;;;;;;;; -FE5B;;Sy;;;;;;;;;;;; -FE5C;;Sy;;;;;;;;;;;; -FE5D;;Sy;;;;;;;;;;;; -FE5E;;Sy;;;;;;;;;;;; -FF08;;Sy;;;;;;;;;;;; -FF09;;Sy;;;;;;;;;;;; -FF3B;;Sy;;;;;;;;;;;; -FF3D;;Sy;;;;;;;;;;;; -FF5B;;Sy;;;;;;;;;;;; -FF5D;;Sy;;;;;;;;;;;; -FF62;;Sy;;;;;;;;;;;; -FF63;;Sy;;;;;;;;;;;; -# -# Hex digit. -# -0030;;Hd;;;;;;;;;;;; -0031;;Hd;;;;;;;;;;;; -0032;;Hd;;;;;;;;;;;; -0033;;Hd;;;;;;;;;;;; -0034;;Hd;;;;;;;;;;;; -0035;;Hd;;;;;;;;;;;; -0036;;Hd;;;;;;;;;;;; -0037;;Hd;;;;;;;;;;;; -0038;;Hd;;;;;;;;;;;; -0039;;Hd;;;;;;;;;;;; -0041;;Hd;;;;;;;;;;;; -0042;;Hd;;;;;;;;;;;; -0043;;Hd;;;;;;;;;;;; -0044;;Hd;;;;;;;;;;;; -0045;;Hd;;;;;;;;;;;; -0046;;Hd;;;;;;;;;;;; -0061;;Hd;;;;;;;;;;;; -0062;;Hd;;;;;;;;;;;; -0063;;Hd;;;;;;;;;;;; -0064;;Hd;;;;;;;;;;;; -0065;;Hd;;;;;;;;;;;; -0066;;Hd;;;;;;;;;;;; -FF10;;Hd;;;;;;;;;;;; -FF11;;Hd;;;;;;;;;;;; -FF12;;Hd;;;;;;;;;;;; -FF13;;Hd;;;;;;;;;;;; -FF14;;Hd;;;;;;;;;;;; -FF15;;Hd;;;;;;;;;;;; -FF16;;Hd;;;;;;;;;;;; -FF17;;Hd;;;;;;;;;;;; -FF18;;Hd;;;;;;;;;;;; -FF19;;Hd;;;;;;;;;;;; -FF21;;Hd;;;;;;;;;;;; -FF22;;Hd;;;;;;;;;;;; -FF23;;Hd;;;;;;;;;;;; -FF24;;Hd;;;;;;;;;;;; -FF25;;Hd;;;;;;;;;;;; -FF26;;Hd;;;;;;;;;;;; -FF41;;Hd;;;;;;;;;;;; -FF42;;Hd;;;;;;;;;;;; -FF43;;Hd;;;;;;;;;;;; -FF44;;Hd;;;;;;;;;;;; -FF45;;Hd;;;;;;;;;;;; -FF46;;Hd;;;;;;;;;;;; -# -# Quote marks. -# -0022;;Qm;;;;;;;;;;;; -0027;;Qm;;;;;;;;;;;; -00AB;;Qm;;;;;;;;;;;; -00BB;;Qm;;;;;;;;;;;; -2018;;Qm;;;;;;;;;;;; -2019;;Qm;;;;;;;;;;;; -201A;;Qm;;;;;;;;;;;; -201B;;Qm;;;;;;;;;;;; -201C;;Qm;;;;;;;;;;;; -201D;;Qm;;;;;;;;;;;; -201E;;Qm;;;;;;;;;;;; -201F;;Qm;;;;;;;;;;;; -2039;;Qm;;;;;;;;;;;; -203A;;Qm;;;;;;;;;;;; -300C;;Qm;;;;;;;;;;;; -300D;;Qm;;;;;;;;;;;; -300E;;Qm;;;;;;;;;;;; -300F;;Qm;;;;;;;;;;;; -301D;;Qm;;;;;;;;;;;; -301E;;Qm;;;;;;;;;;;; -301F;;Qm;;;;;;;;;;;; -FE41;;Qm;;;;;;;;;;;; -FE42;;Qm;;;;;;;;;;;; -FE43;;Qm;;;;;;;;;;;; -FE44;;Qm;;;;;;;;;;;; -FF02;;Qm;;;;;;;;;;;; -FF07;;Qm;;;;;;;;;;;; -FF62;;Qm;;;;;;;;;;;; -FF63;;Qm;;;;;;;;;;;; diff --git a/intl/unicharutil/tools/UCDATAREADME.txt b/intl/unicharutil/tools/UCDATAREADME.txt deleted file mode 100644 index 012098fc0697..000000000000 --- a/intl/unicharutil/tools/UCDATAREADME.txt +++ /dev/null @@ -1,207 +0,0 @@ -# -# $Id: UCDATAREADME.txt,v 1.1 1999/01/08 00:19:20 ftang%netscape.com Exp $ -# - - MUTT UCData Package 1.9 - ----------------------- - -This is a package that supports ctype-like operations for Unicode UCS-2 text -(and surrogates), case mapping, and decomposition lookup. To use it, you will -need to get the "UnicodeData-2.0.14.txt" (or later) file from the Unicode Web -or FTP site. - -This package consists of two parts: - - 1. A program called "ucgendat" which generates five data files from the - UnicodeData-2.*.txt file. The files are: - - A. case.dat - the case mappings. - B. ctype.dat - the character property tables. - C. decomp.dat - the character decompositions. - D. cmbcl.dat - the non-zero combining classes. - E. num.dat - the codes representing numbers. - - 2. The "ucdata.[ch]" files which implement the functions needed to - check to see if a character matches groups of properties, to map between - upper, lower, and title case, to look up the decomposition of a - character, look up the combining class of a character, and get the number - value of a character. - -A short reference to the functions available is in the "api.txt" file. - -Techie Details -============== - -The "ucgendat" program parses files from the command line which are all in the -Unicode Character Database (UCDB) format. An additional properties file, -"MUTTUCData.txt", provides some extra properties for some characters. - -The program looks for the two character properties fields (2 and 4), the -combining class field (3), the decomposition field (5), the numeric value -field (8), and the case mapping fields (12, 13, and 14). The decompositions -are recursively expanded before being written out. - -The decomposition table contains all the canonical decompositions. This means -all decompositions that do not have tags such as "" or "". - -The data is almost all stored as unsigned longs (32-bits assumed) and the -routines that load the data take care of endian swaps when necessary. This -also means that surrogates (>= 0x10000) can be placed in the data files the -"ucgendat" program parses. - -The data is written as external files and broken into five parts so it can be -selectively updated at runtime if necessary. - -The data files currently generated from the "ucgendat" program total about 56K -in size all together. - -The format of the binary data files is documented in the "format.txt" file. - -Mark Leisher -13 December 1998 - -CHANGES -======= - -Version 1.9 ------------ -1. Fixed a problem with an incorrect amount of storage being allocated for the - combining class nodes. - -2. Fixed an invalid initialization in the number code. - -3. Changed the Java template file formatting a bit. - -4. Added tables and function for getting decompositions in the Java class. - -Version 1.8 ------------ -1. Fixed a problem with adding certain ranges. - -2. Added two more macros for testing for identifiers. - -3. Tested with the UnicodeData-2.1.5.txt file. - -Version 1.7 ------------ -1. Fixed a problem with looking up decompositions in "ucgendat." - -Version 1.6 ------------ -1. Added two new properties introduced with UnicodeData-2.1.4.txt. - -2. Changed the "ucgendat.c" program a little to automatically align the - property data on a 4-byte boundary when new properties are added. - -3. Changed the "ucgendat.c" programs to only generate canonical - decompositions. - -4. Added two new macros ucisinitialpunct() and ucisfinalpunct() to check for - initial and final punctuation characters. - -5. Minor additions and changes to the documentation. - -Version 1.5 ------------ -1. Changed all file open calls to include binary mode with "b" for DOS/WIN - platforms. - -2. Wrapped the unistd.h include so it won't be included when compiled under - Win32. - -3. Fixed a bad range check for hex digits in ucgendat.c. - -4. Fixed a bad endian swap for combining classes. - -5. Added code to make a number table and associated lookup functions. - Functions added are ucnumber(), ucdigit(), and ucgetnumber(). The last - function is to maintain compatibility with John Cowan's "uctype" package. - -Version 1.4 ------------ -1. Fixed a bug with adding a range. - -2. Fixed a bug with inserting a range in order. - -3. Fixed incorrectly specified ucisdefined() and ucisundefined() macros. - -4. Added the missing unload for the combining class data. - -5. Fixed a bad macro placement in ucisweak(). - -Version 1.3 ------------ -1. Bug with case mapping calculations fixed. - -2. Bug with empty character property entries fixed. - -3. Bug with incorrect type in the combining class lookup fixed. - -4. Some corrections done to api.txt. - -5. Bug in certain character property lookups fixed. - -6. Added a character property table that records the defined characters. - -7. Replaced ucisunknown() with ucisdefined() and ucisundefined(). - -Version 1.2 ------------ -1. Added code to ucgendat to generate a combining class table. - -2. Fixed an endian problem with the byte count of decompositions. - -3. Fixed some minor problems in the "format.txt" file. - -4. Removed some bogus "Ss" values from MUTTUCData.txt file. - -5. Added API function to get combining class. - -6. Changed the open mode to "rb" so binary data files will be opened correctly - on DOS/WIN as well as other platforms. - -7. Added the "api.txt" file. - -Version 1.1 ------------ -1. Added ucisxdigit() which I overlooked. - -2. Added UC_LT to the ucisalpha() macro which I overlooked. - -3. Change uciscntrl() to include UC_CF. - -4. Added ucisocntrl() and ucfntcntrl() macros. - -5. Added a ucisblank() which I overlooked. - -6. Added missing properties to ucissymbol() and ucisnumber(). - -7. Added ucisgraph() and ucisprint(). - -8. Changed the "Mr" property to "Sy" to mark this subset of mirroring - characters as symmetric to avoid trampling the Unicode/ISO10646 sense of - mirroring. - -9. Added another property called "Ss" which includes control characters - traditionally seen as spaces in the isspace() macro. - -10. Added a bunch of macros to be API compatible with John Cowan's package. - -ACKNOWLEDGEMENTS -================ - -Thanks go to John Cowan for pointing out lots of -missing things and giving me stuff, particularly a bunch of new macros. - -Thanks go to Bob Verbrugge for pointing out -various bugs. - -Thanks go to Christophe Pierret for pointing -out that file modes need to have "b" for DOS/WIN machines, pointing out -unistd.h is not a Win 32 header, and pointing out a problem with ucisalnum(). - -Thanks go to Kent Johnson for finding a bug that caused -incomplete decompositions to be generated by the "ucgendat" program. - -Thanks go to Valeriy E. Ushakov for spotting an allocation -error and an initialization error. diff --git a/intl/unicharutil/tools/data/case.dat b/intl/unicharutil/tools/data/case.dat deleted file mode 100644 index 3bf7f2c932e3..000000000000 Binary files a/intl/unicharutil/tools/data/case.dat and /dev/null differ diff --git a/intl/unicharutil/tools/data/cmbcl.dat b/intl/unicharutil/tools/data/cmbcl.dat deleted file mode 100644 index 9a7513d8f8ce..000000000000 Binary files a/intl/unicharutil/tools/data/cmbcl.dat and /dev/null differ diff --git a/intl/unicharutil/tools/data/ctype.dat b/intl/unicharutil/tools/data/ctype.dat deleted file mode 100644 index 95e2c405b779..000000000000 Binary files a/intl/unicharutil/tools/data/ctype.dat and /dev/null differ diff --git a/intl/unicharutil/tools/data/decomp.dat b/intl/unicharutil/tools/data/decomp.dat deleted file mode 100644 index 29e3948e3f31..000000000000 Binary files a/intl/unicharutil/tools/data/decomp.dat and /dev/null differ diff --git a/intl/unicharutil/tools/data/num.dat b/intl/unicharutil/tools/data/num.dat deleted file mode 100644 index 52498ece0683..000000000000 Binary files a/intl/unicharutil/tools/data/num.dat and /dev/null differ diff --git a/intl/unicharutil/tools/format.txt b/intl/unicharutil/tools/format.txt deleted file mode 100644 index ef4210be1cb9..000000000000 --- a/intl/unicharutil/tools/format.txt +++ /dev/null @@ -1,243 +0,0 @@ -# -# $Id: format.txt,v 1.1 1999/01/08 00:19:20 ftang%netscape.com Exp $ -# - -CHARACTER DATA -============== - -This package generates some data files that contain character properties useful -for text processing. - -CHARACTER PROPERTIES -==================== - -The first data file is called "ctype.dat" and contains a compressed form of -the character properties found in the Unicode Character Database (UCDB). -Additional properties can be specified in limited UCDB format in another file -to avoid modifying the original UCDB. - -The following is a property name and code table to be used with the character -data: - -NAME CODE DESCRIPTION ---------------------- -Mn 0 Mark, Non-Spacing -Mc 1 Mark, Spacing Combining -Me 2 Mark, Enclosing -Nd 3 Number, Decimal Digit -Nl 4 Number, Letter -No 5 Number, Other -Zs 6 Separator, Space -Zl 7 Separator, Line -Zp 8 Separator, Paragraph -Cc 9 Other, Control -Cf 10 Other, Format -Cs 11 Other, Surrogate -Co 12 Other, Private Use -Cn 13 Other, Not Assigned -Lu 14 Letter, Uppercase -Ll 15 Letter, Lowercase -Lt 16 Letter, Titlecase -Lm 17 Letter, Modifier -Lo 18 Letter, Other -Pc 19 Punctuation, Connector -Pd 20 Punctuation, Dash -Ps 21 Punctuation, Open -Pe 22 Punctuation, Close -Po 23 Punctuation, Other -Sm 24 Symbol, Math -Sc 25 Symbol, Currency -Sk 26 Symbol, Modifier -So 27 Symbol, Other -L 28 Left-To-Right -R 29 Right-To-Left -EN 30 European Number -ES 31 European Number Separator -ET 32 European Number Terminator -AN 33 Arabic Number -CS 34 Common Number Separator -B 35 Block Separator -S 36 Segment Separator -WS 37 Whitespace -ON 38 Other Neutrals -Pi 47 Punctuation, Initial -Pf 48 Punctuation, Final -# -# Implementation specific properties. -# -Cm 39 Composite -Nb 40 Non-Breaking -Sy 41 Symmetric (characters which are part of open/close pairs) -Hd 42 Hex Digit -Qm 43 Quote Mark -Mr 44 Mirroring -Ss 45 Space, Other (controls viewed as spaces in ctype isspace()) -Cp 46 Defined character - -The actual binary data is formatted as follows: - - Assumptions: unsigned short is at least 16-bits in size and unsigned long - is at least 32-bits in size. - - unsigned short ByteOrderMark - unsigned short OffsetArraySize - unsigned long Bytes - unsigned short Offsets[OffsetArraySize + 1] - unsigned long Ranges[N], N = value of Offsets[OffsetArraySize] - - The Bytes field provides the total byte count used for the Offsets[] and - Ranges[] arrays. The Offsets[] array is aligned on a 4-byte boundary and - there is always one extra node on the end to hold the final index of the - Ranges[] array. The Ranges[] array contains pairs of 4-byte values - representing a range of Unicode characters. The pairs are arranged in - increasing order by the first character code in the range. - - Determining if a particular character is in the property list requires a - simple binary search to determine if a character is in any of the ranges - for the property. - - If the ByteOrderMark is equal to 0xFFFE, then the data was generated on a - machine with a different endian order and the values must be byte-swapped. - - To swap a 16-bit value: - c = (c >> 8) | ((c & 0xff) << 8) - - To swap a 32-bit value: - c = ((c & 0xff) << 24) | (((c >> 8) & 0xff) << 16) | - (((c >> 16) & 0xff) << 8) | (c >> 24) - -CASE MAPPINGS -============= - -The next data file is called "case.dat" and contains three case mapping tables -in the following order: upper, lower, and title case. Each table is in -increasing order by character code and each mapping contains 3 unsigned longs -which represent the possible mappings. - -The format for the binary form of these tables is: - - unsigned short ByteOrderMark - unsigned short NumMappingNodes, count of all mapping nodes - unsigned short CaseTableSizes[2], upper and lower mapping node counts - unsigned long CaseTables[NumMappingNodes] - - The starting indexes of the case tables are calculated as following: - - UpperIndex = 0; - LowerIndex = CaseTableSizes[0] * 3; - TitleIndex = LowerIndex + CaseTableSizes[1] * 3; - - The order of the fields for the three tables are: - - Upper case - ---------- - unsigned long upper; - unsigned long lower; - unsigned long title; - - Lower case - ---------- - unsigned long lower; - unsigned long upper; - unsigned long title; - - Title case - ---------- - unsigned long title; - unsigned long upper; - unsigned long lower; - - If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the - same way as described in the CHARACTER PROPERTIES section. - - Because the tables are in increasing order by character code, locating a - mapping requires a simple binary search on one of the 3 codes that make up - each node. - - It is important to note that there can only be 65536 mapping nodes which - divided into 3 portions allows 21845 nodes for each case mapping table. The - distribution of mappings may be more or less than 21845 per table, but only - 65536 are allowed. - -DECOMPOSITIONS -============== - -The next data file is called "decomp.dat" and contains the decomposition data -for all characters with decompositions containing more than one character and -are *not* compatibility decompositions. Compatibility decompositions are -signaled in the UCDB format by the use of the tag in the -decomposition field. Each list of character codes represents a full -decomposition of a composite character. The nodes are arranged in increasing -order by character code. - -The format for the binary form of this table is: - - unsigned short ByteOrderMark - unsigned short NumDecompNodes, count of all decomposition nodes - unsigned long Bytes - unsigned long DecompNodes[(NumDecompNodes * 2) + 1] - unsigned long Decomp[N], N = sum of all counts in DecompNodes[] - - If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the - same way as described in the CHARACTER PROPERTIES section. - - The DecompNodes[] array consists of pairs of unsigned longs, the first of - which is the character code and the second is the initial index of the list - of character codes representing the decomposition. - - Locating the decomposition of a composite character requires a binary search - for a character code in the DecompNodes[] array and using its index to - locate the start of the decomposition. The length of the decomposition list - is the index in the following element in DecompNode[] minus the current - index. - -COMBINING CLASSES -================= - -The fourth data file is called "cmbcl.dat" and contains the characters with -non-zero combining classes. - -The format for the binary form of this table is: - - unsigned short ByteOrderMark - unsigned short NumCCLNodes - unsigned long Bytes - unsigned long CCLNodes[NumCCLNodes * 3] - - If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the - same way as described in the CHARACTER PROPERTIES section. - - The CCLNodes[] array consists of groups of three unsigned longs. The first - and second are the beginning and ending of a range and the third is the - combining class of that range. - - If a character is not found in this table, then the combining class is - assumed to be 0. - - It is important to note that only 65536 distinct ranges plus combining class - can be specified because the NumCCLNodes is usually a 16-bit number. - -NUMBER TABLE -============ - -The final data file is called "num.dat" and contains the characters that have -a numeric value associated with them. - -The format for the binary form of the table is: - - unsigned short ByteOrderMark - unsigned short NumNumberNodes - unsigned long Bytes - unsigned long NumberNodes[NumNumberNodes] - unsigned short ValueNodes[(Bytes - (NumNumberNodes * sizeof(unsigned long))) - / sizeof(short)] - - If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the - same way as described in the CHARACTER PROPERTIES section. - - The NumberNodes array contains pairs of values, the first of which is the - character code and the second an index into the ValueNodes array. The - ValueNodes array contains pairs of integers which represent the numerator - and denominator of the numeric value of the character. If the character - happens to map to an integer, both the values in ValueNodes will be the - same. diff --git a/intl/unicharutil/tools/moz.build b/intl/unicharutil/tools/moz.build deleted file mode 100644 index d2072d2aacbd..000000000000 --- a/intl/unicharutil/tools/moz.build +++ /dev/null @@ -1,12 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -Program('ucgendat') - -SOURCES += [ - 'ucgendat.c', -] - diff --git a/intl/unicharutil/tools/ucgendat.c b/intl/unicharutil/tools/ucgendat.c deleted file mode 100644 index 400359ad4c6b..000000000000 --- a/intl/unicharutil/tools/ucgendat.c +++ /dev/null @@ -1,1457 +0,0 @@ -/* - * Copyright 1996, 1997, 1998 Computing Research Labs, - * New Mexico State University - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT - * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef lint -#ifdef __GNUC__ -static char rcsid[] __attribute__ ((unused)) = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; -#else -static char rcsid[] = "$Id: ucgendat.c,v 1.1 1999/01/08 00:19:21 ftang%netscape.com Exp $"; -#endif -#endif - -#include -#include -#include -#ifndef WIN32 -#include -#endif - -#define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\ - ((cc) >= 'A' && (cc) <= 'F') ||\ - ((cc) >= 'a' && (cc) <= 'f')) - -/* - * A header written to the output file with the byte-order-mark and the number - * of property nodes. - */ -static unsigned short hdr[2] = {0xfeff, 0}; - -#define NUMPROPS 49 -#define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3))) - -typedef struct { - char *name; - int len; -} _prop_t; - -/* - * List of properties expected to be found in the Unicode Character Database - * including some implementation specific properties. - * - * The implementation specific properties are: - * Cm = Composed (can be decomposed) - * Nb = Non-breaking - * Sy = Symmetric (has left and right forms) - * Hd = Hex digit - * Qm = Quote marks - * Mr = Mirroring - * Ss = Space, other - * Cp = Defined character - */ -static _prop_t props[NUMPROPS] = { - {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2}, - {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2}, - {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2}, - {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2}, - {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L", 1}, {"R", 1}, - {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B", 1}, - {"S", 1}, {"WS", 2}, {"ON", 2}, - {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2}, - {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2} -}; - -typedef struct { - unsigned long *ranges; - unsigned short used; - unsigned short size; -} _ranges_t; - -static _ranges_t proptbl[NUMPROPS]; - -/* - * Make sure this array is sized to be on a 4-byte boundary at compile time. - */ -static unsigned short propcnt[NEEDPROPS]; - -/* - * Array used to collect a decomposition before adding it to the decomposition - * table. - */ -static unsigned long dectmp[64]; -static unsigned long dectmp_size; - -typedef struct { - unsigned long code; - unsigned short size; - unsigned short used; - unsigned long *decomp; -} _decomp_t; - -/* - * List of decomposition. Created and expanded in order as the characters are - * encountered. - */ -static _decomp_t *decomps; -static unsigned long decomps_used; -static unsigned long decomps_size; - -/* - * Types and lists for handling lists of case mappings. - */ -typedef struct { - unsigned long key; - unsigned long other1; - unsigned long other2; -} _case_t; - -static _case_t *upper; -static _case_t *lower; -static _case_t *title; -static unsigned long upper_used; -static unsigned long upper_size; -static unsigned long lower_used; -static unsigned long lower_size; -static unsigned long title_used; -static unsigned long title_size; - -/* - * Array used to collect case mappings before adding them to a list. - */ -static unsigned long cases[3]; - -/* - * An array to hold ranges for combining classes. - */ -static unsigned long *ccl; -static unsigned long ccl_used; -static unsigned long ccl_size; - -/* - * Structures for handling numbers. - */ -typedef struct { - unsigned long code; - unsigned long idx; -} _codeidx_t; - -typedef struct { - short numerator; - short denominator; -} _num_t; - -/* - * Arrays to hold the mapping of codes to numbers. - */ -static _codeidx_t *ncodes; -static unsigned long ncodes_used; -static unsigned long ncodes_size; - -static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; - -/* - * Array for holding numbers. - */ -static _num_t *nums; -static unsigned long nums_used; -static unsigned long nums_size; - -static void -#ifdef __STDC__ -add_range(unsigned long start, unsigned long end, char *p1, char *p2) -#else -add_range(start, end, p1, p2) -unsigned long start, end; -char *p1, *p2; -#endif -{ - int i, j, k, len; - _ranges_t *rlp; - char *name; - - for (k = 0; k < 2; k++) { - if (k == 0) { - name = p1; - len = 2; - } else { - if (p2 == 0) - break; - - name = p2; - len = 1; - } - - for (i = 0; i < NUMPROPS; i++) { - if (props[i].len == len && memcmp(props[i].name, name, len) == 0) - break; - } - - if (i == NUMPROPS) - continue; - - rlp = &proptbl[i]; - - /* - * Resize the range list if necessary. - */ - if (rlp->used == rlp->size) { - if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); - else - rlp->ranges = (unsigned long *) - realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); - rlp->size += 8; - } - - /* - * If this is the first code for this property list, just add it - * and return. - */ - if (rlp->used == 0) { - rlp->ranges[0] = start; - rlp->ranges[1] = end; - rlp->used += 2; - continue; - } - - /* - * Optimize the case of adding the range to the end. - */ - j = rlp->used - 1; - if (start > rlp->ranges[j]) { - j = rlp->used; - rlp->ranges[j++] = start; - rlp->ranges[j++] = end; - rlp->used = j; - continue; - } - - /* - * Need to locate the insertion point. - */ - for (i = 0; - i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ; - - /* - * If the start value lies in the current range, then simply set the - * new end point of the range to the end value passed as a parameter. - */ - if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) { - rlp->ranges[i + 1] = end; - return; - } - - /* - * Shift following values up by two. - */ - for (j = rlp->used; j > i; j -= 2) { - rlp->ranges[j] = rlp->ranges[j - 2]; - rlp->ranges[j + 1] = rlp->ranges[j - 1]; - } - - /* - * Add the new range at the insertion point. - */ - rlp->ranges[i] = start; - rlp->ranges[i + 1] = end; - rlp->used += 2; - } -} - -static void -#ifdef __STDC__ -ordered_range_insert(unsigned long c, char *name, int len) -#else -ordered_range_insert(c, name, len) -unsigned long c; -char *name; -int len; -#endif -{ - int i, j; - unsigned long s, e; - _ranges_t *rlp; - - if (len == 0) - return; - - for (i = 0; i < NUMPROPS; i++) { - if (props[i].len == len && memcmp(props[i].name, name, len) == 0) - break; - } - - if (i == NUMPROPS) - return; - - /* - * Have a match, so insert the code in order. - */ - rlp = &proptbl[i]; - - /* - * Resize the range list if necessary. - */ - if (rlp->used == rlp->size) { - if (rlp->size == 0) - rlp->ranges = (unsigned long *) - malloc(sizeof(unsigned long) << 3); - else - rlp->ranges = (unsigned long *) - realloc((char *) rlp->ranges, - sizeof(unsigned long) * (rlp->size + 8)); - rlp->size += 8; - } - - /* - * If this is the first code for this property list, just add it - * and return. - */ - if (rlp->used == 0) { - rlp->ranges[0] = rlp->ranges[1] = c; - rlp->used += 2; - return; - } - - /* - * Optimize the cases of extending the last range and adding new ranges to - * the end. - */ - j = rlp->used - 1; - e = rlp->ranges[j]; - s = rlp->ranges[j - 1]; - - if (c == e + 1) { - /* - * Extend the last range. - */ - rlp->ranges[j] = c; - return; - } - - if (c > e + 1) { - /* - * Start another range on the end. - */ - j = rlp->used; - rlp->ranges[j] = rlp->ranges[j + 1] = c; - rlp->used += 2; - return; - } - - if (c >= s) - /* - * The code is a duplicate of a code in the last range, so just return. - */ - return; - - /* - * The code should be inserted somewhere before the last range in the - * list. Locate the insertion point. - */ - for (i = 0; - i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ; - - s = rlp->ranges[i]; - e = rlp->ranges[i + 1]; - - if (c == e + 1) - /* - * Simply extend the current range. - */ - rlp->ranges[i + 1] = c; - else if (c < s) { - /* - * Add a new entry before the current location. Shift all entries - * before the current one up by one to make room. - */ - for (j = rlp->used; j > i; j -= 2) { - rlp->ranges[j] = rlp->ranges[j - 2]; - rlp->ranges[j + 1] = rlp->ranges[j - 1]; - } - rlp->ranges[i] = rlp->ranges[i + 1] = c; - - rlp->used += 2; - } -} - -static void -#ifdef __STDC__ -add_decomp(unsigned long code) -#else -add_decomp(code) -unsigned long code; -#endif -{ - unsigned long i, j, size; - - /* - * Add the code to the composite property. - */ - ordered_range_insert(code, "Cm", 2); - - /* - * Locate the insertion point for the code. - */ - for (i = 0; i < decomps_used && code > decomps[i].code; i++) ; - - /* - * Allocate space for a new decomposition. - */ - if (decomps_used == decomps_size) { - if (decomps_size == 0) - decomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3); - else - decomps = (_decomp_t *) - realloc((char *) decomps, - sizeof(_decomp_t) * (decomps_size + 8)); - (void) memset((char *) (decomps + decomps_size), 0, - sizeof(_decomp_t) << 3); - decomps_size += 8; - } - - if (i < decomps_used && code != decomps[i].code) { - /* - * Shift the decomps up by one if the codes don't match. - */ - for (j = decomps_used; j > i; j--) - (void) memcpy((char *) &decomps[j], (char *) &decomps[j - 1], - sizeof(_decomp_t)); - } - - /* - * Insert or replace a decomposition. - */ - size = dectmp_size + (4 - (dectmp_size & 3)); - if (decomps[i].size < size) { - if (decomps[i].size == 0) - decomps[i].decomp = (unsigned long *) - malloc(sizeof(unsigned long) * size); - else - decomps[i].decomp = (unsigned long *) - realloc((char *) decomps[i].decomp, - sizeof(unsigned long) * size); - decomps[i].size = size; - } - - if (decomps[i].code != code) - decomps_used++; - - decomps[i].code = code; - decomps[i].used = dectmp_size; - (void) memcpy((char *) decomps[i].decomp, (char *) dectmp, - sizeof(unsigned long) * dectmp_size); - -} - -static void -#ifdef __STDC__ -add_title(unsigned long code) -#else -add_title(code) -unsigned long code; -#endif -{ - unsigned long i, j; - - /* - * Always map the code to itself. - */ - cases[2] = code; - - if (title_used == title_size) { - if (title_size == 0) - title = (_case_t *) malloc(sizeof(_case_t) << 3); - else - title = (_case_t *) realloc((char *) title, - sizeof(_case_t) * (title_size + 8)); - title_size += 8; - } - - /* - * Locate the insertion point. - */ - for (i = 0; i < title_used && code > title[i].key; i++) ; - - if (i < title_used) { - /* - * Shift the array up by one. - */ - for (j = title_used; j > i; j--) - (void) memcpy((char *) &title[j], (char *) &title[j - 1], - sizeof(_case_t)); - } - - title[i].key = cases[2]; /* Title */ - title[i].other1 = cases[0]; /* Upper */ - title[i].other2 = cases[1]; /* Lower */ - - title_used++; -} - -static void -#ifdef __STDC__ -add_upper(unsigned long code) -#else -add_upper(code) -unsigned long code; -#endif -{ - unsigned long i, j; - - /* - * Always map the code to itself. - */ - cases[0] = code; - - /* - * If the title case character is not present, then make it the same as - * the upper case. - */ - if (cases[2] == 0) - cases[2] = code; - - if (upper_used == upper_size) { - if (upper_size == 0) - upper = (_case_t *) malloc(sizeof(_case_t) << 3); - else - upper = (_case_t *) realloc((char *) upper, - sizeof(_case_t) * (upper_size + 8)); - upper_size += 8; - } - - /* - * Locate the insertion point. - */ - for (i = 0; i < upper_used && code > upper[i].key; i++) ; - - if (i < upper_used) { - /* - * Shift the array up by one. - */ - for (j = upper_used; j > i; j--) - (void) memcpy((char *) &upper[j], (char *) &upper[j - 1], - sizeof(_case_t)); - } - - upper[i].key = cases[0]; /* Upper */ - upper[i].other1 = cases[1]; /* Lower */ - upper[i].other2 = cases[2]; /* Title */ - - upper_used++; -} - -static void -#ifdef __STDC__ -add_lower(unsigned long code) -#else -add_lower(code) -unsigned long code; -#endif -{ - unsigned long i, j; - - /* - * Always map the code to itself. - */ - cases[1] = code; - - /* - * If the title case character is empty, then make it the same as the - * upper case. - */ - if (cases[2] == 0) - cases[2] = cases[0]; - - if (lower_used == lower_size) { - if (lower_size == 0) - lower = (_case_t *) malloc(sizeof(_case_t) << 3); - else - lower = (_case_t *) realloc((char *) lower, - sizeof(_case_t) * (lower_size + 8)); - lower_size += 8; - } - - /* - * Locate the insertion point. - */ - for (i = 0; i < lower_used && code > lower[i].key; i++) ; - - if (i < lower_used) { - /* - * Shift the array up by one. - */ - for (j = lower_used; j > i; j--) - (void) memcpy((char *) &lower[j], (char *) &lower[j - 1], - sizeof(_case_t)); - } - - lower[i].key = cases[1]; /* Lower */ - lower[i].other1 = cases[0]; /* Upper */ - lower[i].other2 = cases[2]; /* Title */ - - lower_used++; -} - -static void -#ifdef __STDC__ -ordered_ccl_insert(unsigned long c, unsigned long ccl_code) -#else -ordered_ccl_insert(c, ccl_code) -unsigned long c, ccl_code; -#endif -{ - unsigned long i, j; - - if (ccl_used == ccl_size) { - if (ccl_size == 0) - ccl = (unsigned long *) malloc(sizeof(unsigned long) * 24); - else - ccl = (unsigned long *) - realloc((char *) ccl, sizeof(unsigned long) * (ccl_size + 24)); - ccl_size += 24; - } - - /* - * Optimize adding the first item. - */ - if (ccl_used == 0) { - ccl[0] = ccl[1] = c; - ccl[2] = ccl_code; - ccl_used += 3; - return; - } - - /* - * Handle the special case of extending the range on the end. This - * requires that the combining class codes are the same. - */ - if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) { - ccl[ccl_used - 2] = c; - return; - } - - /* - * Handle the special case of adding another range on the end. - */ - if (c > ccl[ccl_used - 2] + 1 || - (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) { - ccl[ccl_used++] = c; - ccl[ccl_used++] = c; - ccl[ccl_used++] = ccl_code; - return; - } - - /* - * Locate either the insertion point or range for the code. - */ - for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ; - - if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) { - /* - * Extend an existing range. - */ - ccl[i + 1] = c; - return; - } else if (c < ccl[i]) { - /* - * Start a new range before the current location. - */ - for (j = ccl_used; j > i; j -= 3) { - ccl[j] = ccl[j - 3]; - ccl[j - 1] = ccl[j - 4]; - ccl[j - 2] = ccl[j - 5]; - } - ccl[i] = ccl[i + 1] = c; - ccl[i + 2] = ccl_code; - } -} - -/* - * Adds a number if it does not already exist and returns an index value - * multiplied by 2. - */ -static unsigned long -#ifdef __STDC__ -make_number(short num, short denom) -#else -make_number(num, denom) -short num, denom; -#endif -{ - unsigned long n; - - /* - * Determine if the number already exists. - */ - for (n = 0; n < nums_used; n++) { - if (nums[n].numerator == num && nums[n].denominator == denom) - return n << 1; - } - - if (nums_used == nums_size) { - if (nums_size == 0) - nums = (_num_t *) malloc(sizeof(_num_t) << 3); - else - nums = (_num_t *) realloc((char *) nums, - sizeof(_num_t) * (nums_size + 8)); - nums_size += 8; - } - - n = nums_used++; - nums[n].numerator = num; - nums[n].denominator = denom; - - return n << 1; -} - -static void -#ifdef __STDC__ -add_number(unsigned long code, short num, short denom) -#else -add_number(code, num, denom) -unsigned long code; -short num, denom; -#endif -{ - unsigned long i, j; - - /* - * Insert the code in order. - */ - for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ; - - /* - * Handle the case of the codes matching and simply replace the number - * that was there before. - */ - if (ncodes_used > 0 && code == ncodes[i].code) { - ncodes[i].idx = make_number(num, denom); - return; - } - - /* - * Resize the array if necessary. - */ - if (ncodes_used == ncodes_size) { - if (ncodes_size == 0) - ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3); - else - ncodes = (_codeidx_t *) - realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8)); - - ncodes_size += 8; - } - - /* - * Shift things around to insert the code if necessary. - */ - if (i < ncodes_used) { - for (j = ncodes_used; j > i; j--) { - ncodes[j].code = ncodes[j - 1].code; - ncodes[j].idx = ncodes[j - 1].idx; - } - } - ncodes[i].code = code; - ncodes[i].idx = make_number(num, denom); - - ncodes_used++; -} - -/* - * This routine assumes that the line is a valid Unicode Character Database - * entry. - */ -static void -#ifdef __STDC__ -read_cdata(FILE *in) -#else -read_cdata(in) -FILE *in; -#endif -{ - unsigned long i, lineno, skip, code, ccl_code; - short wnum, neg, number[2]; - char line[512], *s, *e; - - lineno = skip = 0; - while (fscanf(in, "%[^\n]\n", line) != EOF) { - lineno++; - - /* - * Skip blank lines and lines that start with a '#'. - */ - if (line[0] == 0 || line[0] == '#') - continue; - - /* - * If lines need to be skipped, do it here. - */ - if (skip) { - skip--; - continue; - } - - /* - * Collect the code. The code can be up to 6 hex digits in length to - * allow surrogates to be specified. - */ - for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) { - code <<= 4; - if (*s >= '0' && *s <= '9') - code += *s - '0'; - else if (*s >= 'A' && *s <= 'F') - code += (*s - 'A') + 10; - else if (*s >= 'a' && *s <= 'f') - code += (*s - 'a') + 10; - } - - /* - * Handle the following special cases: - * 1. 4E00-9FA5 CJK Ideographs. - * 2. AC00-D7A3 Hangul Syllables. - * 3. D800-DFFF Surrogates. - * 4. E000-F8FF Private Use Area. - * 5. F900-FA2D Han compatibility. - */ - switch (code) { - case 0x4e00: - /* - * The Han ideographs. - */ - add_range(0x4e00, 0x9fff, "Lo", "L"); - - /* - * Add the characters to the defined category. - */ - add_range(0x4e00, 0x9fa5, "Cp", 0); - - skip = 1; - break; - case 0xac00: - /* - * The Hangul syllables. - */ - add_range(0xac00, 0xd7a3, "Lo", "L"); - - /* - * Add the characters to the defined category. - */ - add_range(0xac00, 0xd7a3, "Cp", 0); - - skip = 1; - break; - case 0xd800: - /* - * Make a range of all surrogates and assume some default - * properties. - */ - add_range(0x010000, 0x10ffff, "Cs", "L"); - skip = 5; - break; - case 0xe000: - /* - * The Private Use area. Add with a default set of properties. - */ - add_range(0xe000, 0xf8ff, "Co", "L"); - skip = 1; - break; - case 0xf900: - /* - * The CJK compatibility area. - */ - add_range(0xf900, 0xfaff, "Lo", "L"); - - /* - * Add the characters to the defined category. - */ - add_range(0xf900, 0xfaff, "Cp", 0); - - skip = 1; - } - - if (skip) - continue; - - /* - * Add the code to the defined category. - */ - ordered_range_insert(code, "Cp", 2); - - /* - * Locate the first character property field. - */ - for (i = 0; *s != 0 && i < 2; s++) { - if (*s == ';') - i++; - } - for (e = s; *e && *e != ';'; e++) ; - - ordered_range_insert(code, s, e - s); - - /* - * Locate the combining class code. - */ - for (s = e; *s != 0 && i < 3; s++) { - if (*s == ';') - i++; - } - - /* - * Convert the combining class code from decimal. - */ - for (ccl_code = 0, e = s; *e && *e != ';'; e++) - ccl_code = (ccl_code * 10) + (*e - '0'); - - /* - * Add the code if it not 0. - */ - if (ccl_code != 0) - ordered_ccl_insert(code, ccl_code); - - /* - * Locate the second character property field. - */ - for (s = e; *s != 0 && i < 4; s++) { - if (*s == ';') - i++; - } - for (e = s; *e && *e != ';'; e++) ; - - ordered_range_insert(code, s, e - s); - - /* - * Check for a decomposition. - */ - s = ++e; - if (*s != ';' && *s != '<') { - /* - * Collect the codes of the decomposition. - */ - for (dectmp_size = 0; *s != ';'; ) { - /* - * Skip all leading non-hex digits. - */ - while (!ishdigit(*s)) - s++; - - for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) { - dectmp[dectmp_size] <<= 4; - if (*s >= '0' && *s <= '9') - dectmp[dectmp_size] += *s - '0'; - else if (*s >= 'A' && *s <= 'F') - dectmp[dectmp_size] += (*s - 'A') + 10; - else if (*s >= 'a' && *s <= 'f') - dectmp[dectmp_size] += (*s - 'a') + 10; - } - dectmp_size++; - } - - /* - * If there is more than one code in the temporary decomposition - * array, then add the character with its decomposition. - */ - if (dectmp_size > 1) - add_decomp(code); - } - - /* - * Skip to the number field. - */ - for (i = 0; i < 3 && *s; s++) { - if (*s == ';') - i++; - } - - /* - * Scan the number in. - */ - number[0] = number[1] = 0; - for (e = s, neg = wnum = 0; *e && *e != ';'; e++) { - if (*e == '-') { - neg = 1; - continue; - } - - if (*e == '/') { - /* - * Move the the denominator of the fraction. - */ - if (neg) - number[wnum] *= -1; - neg = 0; - e++; - wnum++; - } - number[wnum] = (number[wnum] * 10) + (*e - '0'); - } - - if (e > s) { - /* - * Adjust the denominator in case of integers and add the number. - */ - if (wnum == 0) - number[1] = number[0]; - - add_number(code, number[0], number[1]); - } - - /* - * Skip to the start of the possible case mappings. - */ - for (s = e, i = 0; i < 4 && *s; s++) { - if (*s == ';') - i++; - } - - /* - * Collect the case mappings. - */ - cases[0] = cases[1] = cases[2] = 0; - for (i = 0; i < 3; i++) { - while (ishdigit(*s)) { - cases[i] <<= 4; - if (*s >= '0' && *s <= '9') - cases[i] += *s - '0'; - else if (*s >= 'A' && *s <= 'F') - cases[i] += (*s - 'A') + 10; - else if (*s >= 'a' && *s <= 'f') - cases[i] += (*s - 'a') + 10; - s++; - } - if (*s == ';') - s++; - } - if (cases[0] && cases[1]) - /* - * Add the upper and lower mappings for a title case character. - */ - add_title(code); - else if (cases[1]) - /* - * Add the lower and title case mappings for the upper case - * character. - */ - add_upper(code); - else if (cases[0]) - /* - * Add the upper and title case mappings for the lower case - * character. - */ - add_lower(code); - } -} - -static _decomp_t * -#ifdef __STDC__ -find_decomp(unsigned long code) -#else -find_decomp(code) -unsigned long code; -#endif -{ - long l, r, m; - - l = 0; - r = decomps_used - 1; - while (l <= r) { - m = (l + r) >> 1; - if (code > decomps[m].code) - l = m + 1; - else if (code < decomps[m].code) - r = m - 1; - else - return &decomps[m]; - } - return 0; -} - -static void -#ifdef __STDC__ -decomp_it(_decomp_t *d) -#else -decomp_it(d) -_decomp_t *d; -#endif -{ - unsigned long i; - _decomp_t *dp; - - for (i = 0; i < d->used; i++) { - if ((dp = find_decomp(d->decomp[i])) != 0) - decomp_it(dp); - else - dectmp[dectmp_size++] = d->decomp[i]; - } -} - -/* - * Expand all decompositions by recursively decomposing each character - * in the decomposition. - */ -static void -#ifdef __STDC__ -expand_decomp(void) -#else -expand_decomp() -#endif -{ - unsigned long i; - - for (i = 0; i < decomps_used; i++) { - dectmp_size = 0; - decomp_it(&decomps[i]); - if (dectmp_size > 0) - add_decomp(decomps[i].code); - } -} - -static void -#ifdef __STDC__ -write_cdata(char *opath) -#else -write_cdata(opath) -char *opath; -#endif -{ - FILE *out; - unsigned long i, idx, bytes, nprops; - unsigned short casecnt[2]; - char path[BUFSIZ]; - - /***************************************************************** - * - * Generate the ctype data. - * - *****************************************************************/ - - /* - * Open the ctype.dat file. - */ - sprintf(path, "%s/ctype.dat", opath); - if ((out = fopen(path, "wb")) == 0) - return; - - /* - * Collect the offsets for the properties. The offsets array is - * on a 4-byte boundary to keep things efficient for architectures - * that need such a thing. - */ - for (i = idx = 0; i < NUMPROPS; i++) { - propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff; - idx += proptbl[i].used; - } - - /* - * Add the sentinel index which is used by the binary search as the upper - * bound for a search. - */ - propcnt[i] = idx; - - /* - * Record the actual number of property lists. This may be different than - * the number of offsets actually written because of aligning on a 4-byte - * boundary. - */ - hdr[1] = NUMPROPS; - - /* - * Calculate the byte count needed and pad the property counts array to a - * 4-byte boundary. - */ - if ((bytes = sizeof(unsigned short) * (NUMPROPS + 1)) & 3) - bytes += 4 - (bytes & 3); - nprops = bytes / sizeof(unsigned short); - bytes += sizeof(unsigned long) * idx; - - /* - * Write the header. - */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); - - /* - * Write the byte count. - */ - fwrite((char *) &bytes, sizeof(unsigned long), 1, out); - - /* - * Write the property list counts. - */ - fwrite((char *) propcnt, sizeof(unsigned short), nprops, out); - - /* - * Write the property lists. - */ - for (i = 0; i < NUMPROPS; i++) { - if (proptbl[i].used > 0) - fwrite((char *) proptbl[i].ranges, sizeof(unsigned long), - proptbl[i].used, out); - } - - fclose(out); - - /***************************************************************** - * - * Generate the case mapping data. - * - *****************************************************************/ - - /* - * Open the case.dat file. - */ - sprintf(path, "%s/case.dat", opath); - if ((out = fopen(path, "wb")) == 0) - return; - - /* - * Write the case mapping tables. - */ - hdr[1] = upper_used + lower_used + title_used; - casecnt[0] = upper_used; - casecnt[1] = lower_used; - - /* - * Write the header. - */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); - - /* - * Write the upper and lower case table sizes. - */ - fwrite((char *) casecnt, sizeof(unsigned short), 2, out); - - if (upper_used > 0) - /* - * Write the upper case table. - */ - fwrite((char *) upper, sizeof(_case_t), upper_used, out); - - if (lower_used > 0) - /* - * Write the lower case table. - */ - fwrite((char *) lower, sizeof(_case_t), lower_used, out); - - if (title_used > 0) - /* - * Write the title case table. - */ - fwrite((char *) title, sizeof(_case_t), title_used, out); - - fclose(out); - - /***************************************************************** - * - * Generate the decomposition data. - * - *****************************************************************/ - - /* - * Fully expand all decompositions before generating the output file. - */ - expand_decomp(); - - /* - * Open the decomp.dat file. - */ - sprintf(path, "%s/decomp.dat", opath); - if ((out = fopen(path, "wb")) == 0) - return; - - hdr[1] = decomps_used; - - /* - * Write the header. - */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); - - /* - * Write a temporary byte count which will be calculated as the - * decompositions are written out. - */ - bytes = 0; - fwrite((char *) &bytes, sizeof(unsigned long), 1, out); - - if (decomps_used) { - /* - * Write the list of decomp nodes. - */ - for (i = idx = 0; i < decomps_used; i++) { - fwrite((char *) &decomps[i].code, sizeof(unsigned long), 1, out); - fwrite((char *) &idx, sizeof(unsigned long), 1, out); - idx += decomps[i].used; - } - - /* - * Write the sentinel index as the last decomp node. - */ - fwrite((char *) &idx, sizeof(unsigned long), 1, out); - - /* - * Write the decompositions themselves. - */ - for (i = 0; i < decomps_used; i++) - fwrite((char *) decomps[i].decomp, sizeof(unsigned long), - decomps[i].used, out); - - /* - * Seek back to the beginning and write the byte count. - */ - bytes = (sizeof(unsigned long) * idx) + - (sizeof(unsigned long) * ((hdr[1] << 1) + 1)); - fseek(out, sizeof(unsigned short) << 1, 0L); - fwrite((char *) &bytes, sizeof(unsigned long), 1, out); - - fclose(out); - } - - /***************************************************************** - * - * Generate the combining class data. - * - *****************************************************************/ - - /* - * Open the cmbcl.dat file. - */ - sprintf(path, "%s/cmbcl.dat", opath); - if ((out = fopen(path, "wb")) == 0) - return; - - /* - * Set the number of ranges used. Each range has a combining class which - * means each entry is a 3-tuple. - */ - hdr[1] = ccl_used / 3; - - /* - * Write the header. - */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); - - /* - * Write out the byte count to maintain header size. - */ - bytes = ccl_used * sizeof(unsigned long); - fwrite((char *) &bytes, sizeof(unsigned long), 1, out); - - if (ccl_used > 0) - /* - * Write the combining class ranges out. - */ - fwrite((char *) ccl, sizeof(unsigned long), ccl_used, out); - - fclose(out); - - /***************************************************************** - * - * Generate the number data. - * - *****************************************************************/ - - /* - * Open the num.dat file. - */ - sprintf(path, "%s/num.dat", opath); - if ((out = fopen(path, "wb")) == 0) - return; - - /* - * The count part of the header will be the total number of codes that - * have numbers. - */ - hdr[1] = (unsigned short) (ncodes_used << 1); - bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t)); - - /* - * Write the header. - */ - fwrite((char *) hdr, sizeof(unsigned short), 2, out); - - /* - * Write out the byte count to maintain header size. - */ - fwrite((char *) &bytes, sizeof(unsigned long), 1, out); - - /* - * Now, if number mappings exist, write them out. - */ - if (ncodes_used > 0) { - fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out); - fwrite((char *) nums, sizeof(_num_t), nums_used, out); - } - - fclose(out); -} - -void -#ifdef __STDC__ -main(int argc, char *argv[]) -#else -main(argc, argv) -int argc; -char *argv[]; -#endif -{ - FILE *in; - char *prog, *opath; - - if ((prog = strrchr(argv[0], '/')) != 0) - prog++; - else - prog = argv[0]; - - opath = 0; - in = stdin; - - argc--; - argv++; - - while (argc > 0) { - if (argv[0][0] == '-' && argv[0][1] == 'o') { - argc--; - argv++; - opath = argv[0]; - } else { - if (in != stdin) - fclose(in); - if ((in = fopen(argv[0], "rb")) == 0) - fprintf(stderr, "%s: unable to open ctype file %s\n", - prog, argv[0]); - else { - read_cdata(in); - fclose(in); - in = 0; - } - } - argc--; - argv++; - } - - if (opath == 0) - opath = "."; - write_cdata(opath); - - exit(0); -} diff --git a/intl/unicharutil/ucdata.c b/intl/unicharutil/ucdata.c deleted file mode 100644 index 318cac8d5869..000000000000 --- a/intl/unicharutil/ucdata.c +++ /dev/null @@ -1,1162 +0,0 @@ -/* - * Copyright 1996, 1997, 1998 Computing Research Labs, - * New Mexico State University - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT - * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef lint -#ifdef __GNUC__ -static char rcsid[] __attribute__ ((unused)) = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; -#else -static char rcsid[] = "$Id: ucdata.c,v 1.1 1999/01/08 00:19:11 ftang%netscape.com Exp $"; -#endif -#endif - -#include -#include -#include -#ifndef WIN32 -#include -#endif - -#include "ucdata.h" - -/************************************************************************** - * - * Miscellaneous types, data, and support functions. - * - **************************************************************************/ - -typedef struct { - unsigned short bom; - unsigned short cnt; - union { - unsigned long bytes; - unsigned short len[2]; - } size; -} _ucheader_t; - -/* - * A simple array of 32-bit masks for lookup. - */ -static unsigned long masks32[32] = { - 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, - 0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800, - 0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000, - 0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000, - 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, - 0x40000000, 0x80000000 -}; - -#define endian_short(cc) (((cc) >> 8) | (((cc) & 0xff) << 8)) -#define endian_long(cc) ((((cc) & 0xff) << 24)|((((cc) >> 8) & 0xff) << 16)|\ - ((((cc) >> 16) & 0xff) << 8)|((cc) >> 24)) - -static FILE * -#ifdef __STDC__ -_ucopenfile(char *paths, char *filename, char *mode) -#else -_ucopenfile(paths, filename, mode) -char *paths, *filename, *mode; -#endif -{ - FILE *f; - char *fp, *dp, *pp, path[BUFSIZ]; - - if (filename == 0 || *filename == 0) - return 0; - - dp = paths; - while (dp && *dp) { - pp = path; - while (*dp && *dp != ':') - *pp++ = *dp++; - *pp++ = '/'; - - fp = filename; - while (*fp) - *pp++ = *fp++; - *pp = 0; - - if ((f = fopen(path, mode)) != 0) - return f; - - if (*dp == ':') - dp++; - } - - return 0; -} - -/************************************************************************** - * - * Support for the character properties. - * - **************************************************************************/ - -static unsigned long _ucprop_size; -static unsigned short *_ucprop_offsets; -static unsigned long *_ucprop_ranges; - -static void -#ifdef __STDC__ -_ucprop_load(char *paths, int reload) -#else -_ucprop_load(paths, reload) -char *paths; -int reload; -#endif -{ - FILE *in; - unsigned long size, i; - _ucheader_t hdr; - - if (_ucprop_size > 0) { - if (!reload) - /* - * The character properties have already been loaded. - */ - return; - - /* - * Unload the current character property data in preparation for - * loading a new copy. Only the first array has to be deallocated - * because all the memory for the arrays is allocated as a single - * block. - */ - free((char *) _ucprop_offsets); - _ucprop_size = 0; - } - - if ((in = _ucopenfile(paths, "ctype.dat", "rb")) == 0) - return; - - /* - * Load the header. - */ - fread((char *) &hdr, sizeof(_ucheader_t), 1, in); - - if (hdr.bom == 0xfffe) { - hdr.cnt = endian_short(hdr.cnt); - hdr.size.bytes = endian_long(hdr.size.bytes); - } - - if ((_ucprop_size = hdr.cnt) == 0) { - fclose(in); - return; - } - - /* - * Allocate all the storage needed for the lookup table. - */ - _ucprop_offsets = (unsigned short *) malloc(hdr.size.bytes); - - /* - * Calculate the offset into the storage for the ranges. The offsets - * array is on a 4-byte boundary and one larger than the value provided in - * the header count field. This means the offset to the ranges must be - * calculated after aligning the count to a 4-byte boundary. - */ - if ((size = ((hdr.cnt + 1) * sizeof(unsigned short))) & 3) - size += 4 - (size & 3); - size >>= 1; - _ucprop_ranges = (unsigned long *) (_ucprop_offsets + size); - - /* - * Load the offset array. - */ - fread((char *) _ucprop_offsets, sizeof(unsigned short), size, in); - - /* - * Do an endian swap if necessary. Don't forget there is an extra node on - * the end with the final index. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i <= _ucprop_size; i++) - _ucprop_offsets[i] = endian_short(_ucprop_offsets[i]); - } - - /* - * Load the ranges. The number of elements is in the last array position - * of the offsets. - */ - fread((char *) _ucprop_ranges, sizeof(unsigned long), - _ucprop_offsets[_ucprop_size], in); - - fclose(in); - - /* - * Do an endian swap if necessary. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i < _ucprop_offsets[_ucprop_size]; i++) - _ucprop_ranges[i] = endian_long(_ucprop_ranges[i]); - } -} - -static void -#ifdef __STDC__ -_ucprop_unload(void) -#else -_ucprop_unload() -#endif -{ - if (_ucprop_size == 0) - return; - - /* - * Only need to free the offsets because the memory is allocated as a - * single block. - */ - free((char *) _ucprop_offsets); - _ucprop_size = 0; -} - -static int -#ifdef __STDC__ -_ucprop_lookup(unsigned long code, unsigned long n) -#else -_ucprop_lookup(code, n) -unsigned long code, n; -#endif -{ - long l, r, m; - - /* - * There is an extra node on the end of the offsets to allow this routine - * to work right. If the index is 0xffff, then there are no nodes for the - * property. - */ - if ((l = _ucprop_offsets[n]) == 0xffff) - return 0; - - /* - * Locate the next offset that is not 0xffff. The sentinel at the end of - * the array is the max index value. - */ - for (m = 1; - n + m < _ucprop_size && _ucprop_offsets[n + m] == 0xffff; m++) ; - - r = _ucprop_offsets[n + m] - 1; - - while (l <= r) { - /* - * Determine a "mid" point and adjust to make sure the mid point is at - * the beginning of a range pair. - */ - m = (l + r) >> 1; - m -= (m & 1); - if (code > _ucprop_ranges[m + 1]) - l = m + 2; - else if (code < _ucprop_ranges[m]) - r = m - 2; - else if (code >= _ucprop_ranges[m] && code <= _ucprop_ranges[m + 1]) - return 1; - } - return 0; -} - -int -#ifdef __STDC__ -ucisprop(unsigned long code, unsigned long mask1, unsigned long mask2) -#else -ucisprop(code, mask1, mask2) -unsigned long code, mask1, mask2; -#endif -{ - unsigned long i; - - if (mask1 == 0 && mask2 == 0) - return 0; - - for (i = 0; mask1 && i < 32; i++) { - if ((mask1 & masks32[i]) && _ucprop_lookup(code, i)) - return 1; - } - - for (i = 32; mask2 && i < _ucprop_size; i++) { - if ((mask2 & masks32[i & 31]) && _ucprop_lookup(code, i)) - return 1; - } - - return 0; -} - -/************************************************************************** - * - * Support for case mapping. - * - **************************************************************************/ - -static unsigned long _uccase_size; -static unsigned short _uccase_len[2]; -static unsigned long *_uccase_map; - -static void -#ifdef __STDC__ -_uccase_load(char *paths, int reload) -#else -_uccase_load(paths, reload) -char *paths; -int reload; -#endif -{ - FILE *in; - unsigned long i; - _ucheader_t hdr; - - if (_uccase_size > 0) { - if (!reload) - /* - * The case mappings have already been loaded. - */ - return; - - free((char *) _uccase_map); - _uccase_size = 0; - } - - if ((in = _ucopenfile(paths, "case.dat", "rb")) == 0) - return; - - /* - * Load the header. - */ - fread((char *) &hdr, sizeof(_ucheader_t), 1, in); - - if (hdr.bom == 0xfffe) { - hdr.cnt = endian_short(hdr.cnt); - hdr.size.len[0] = endian_short(hdr.size.len[0]); - hdr.size.len[1] = endian_short(hdr.size.len[1]); - } - - /* - * Set the node count and lengths of the upper and lower case mapping - * tables. - */ - _uccase_size = hdr.cnt * 3; - _uccase_len[0] = hdr.size.len[0] * 3; - _uccase_len[1] = hdr.size.len[1] * 3; - - _uccase_map = (unsigned long *) - malloc(_uccase_size * sizeof(unsigned long)); - - /* - * Load the case mapping table. - */ - fread((char *) _uccase_map, sizeof(unsigned long), _uccase_size, in); - - /* - * Do an endian swap if necessary. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i < _uccase_size; i++) - _uccase_map[i] = endian_long(_uccase_map[i]); - } -} - -static void -#ifdef __STDC__ -_uccase_unload(void) -#else -_uccase_unload() -#endif -{ - if (_uccase_size == 0) - return; - - free((char *) _uccase_map); - _uccase_size = 0; -} - -static unsigned long -#ifdef __STDC__ -_uccase_lookup(unsigned long code, long l, long r, int field) -#else -_uccase_lookup(code, l, r, field) -unsigned long code; -long l, r; -int field; -#endif -{ - long m; - - /* - * Do the binary search. - */ - while (l <= r) { - /* - * Determine a "mid" point and adjust to make sure the mid point is at - * the beginning of a case mapping triple. - */ - m = (l + r) >> 1; - m -= (m % 3); - if (code > _uccase_map[m]) - l = m + 3; - else if (code < _uccase_map[m]) - r = m - 3; - else if (code == _uccase_map[m]) - return _uccase_map[m + field]; - } - - return code; -} - -unsigned long -#ifdef __STDC__ -uctoupper(unsigned long code) -#else -uctoupper(code) -unsigned long code; -#endif -{ - int field; - long l, r; - - if (ucisupper(code)) - return code; - - if (ucislower(code)) { - /* - * The character is lower case. - */ - field = 1; - l = _uccase_len[0]; - r = (l + _uccase_len[1]) - 1; - } else { - /* - * The character is title case. - */ - field = 2; - l = _uccase_len[0] + _uccase_len[1]; - r = _uccase_size - 1; - } - return _uccase_lookup(code, l, r, field); -} - -unsigned long -#ifdef __STDC__ -uctolower(unsigned long code) -#else -uctolower(code) -unsigned long code; -#endif -{ - int field; - long l, r; - - if (ucislower(code)) - return code; - - if (ucisupper(code)) { - /* - * The character is upper case. - */ - field = 1; - l = 0; - r = _uccase_len[0] - 1; - } else { - /* - * The character is title case. - */ - field = 2; - l = _uccase_len[0] + _uccase_len[1]; - r = _uccase_size - 1; - } - return _uccase_lookup(code, l, r, field); -} - -unsigned long -#ifdef __STDC__ -uctotitle(unsigned long code) -#else -uctotitle(code) -unsigned long code; -#endif -{ - int field; - long l, r; - - if (ucistitle(code)) - return code; - - /* - * The offset will always be the same for converting to title case. - */ - field = 2; - - if (ucisupper(code)) { - /* - * The character is upper case. - */ - l = 0; - r = _uccase_len[0] - 1; - } else { - /* - * The character is lower case. - */ - l = _uccase_len[0]; - r = (l + _uccase_len[1]) - 1; - } - return _uccase_lookup(code, l, r, field); -} - -/************************************************************************** - * - * Support for decompositions. - * - **************************************************************************/ - -static unsigned long _ucdcmp_size; -static unsigned long *_ucdcmp_nodes; -static unsigned long *_ucdcmp_decomp; - -static void -#ifdef __STDC__ -_ucdcmp_load(char *paths, int reload) -#else -_ucdcmp_load(paths, reload) -char *paths; -int reload; -#endif -{ - FILE *in; - unsigned long size, i; - _ucheader_t hdr; - - if (_ucdcmp_size > 0) { - if (!reload) - /* - * The decompositions have already been loaded. - */ - return; - - free((char *) _ucdcmp_nodes); - _ucdcmp_size = 0; - } - - if ((in = _ucopenfile(paths, "decomp.dat", "rb")) == 0) - return; - - /* - * Load the header. - */ - fread((char *) &hdr, sizeof(_ucheader_t), 1, in); - - if (hdr.bom == 0xfffe) { - hdr.cnt = endian_short(hdr.cnt); - hdr.size.bytes = endian_long(hdr.size.bytes); - } - - _ucdcmp_size = hdr.cnt << 1; - _ucdcmp_nodes = (unsigned long *) malloc(hdr.size.bytes); - _ucdcmp_decomp = _ucdcmp_nodes + (_ucdcmp_size + 1); - - /* - * Read the decomposition data in. - */ - size = hdr.size.bytes / sizeof(unsigned long); - fread((char *) _ucdcmp_nodes, sizeof(unsigned long), size, in); - - /* - * Do an endian swap if necessary. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i < size; i++) - _ucdcmp_nodes[i] = endian_long(_ucdcmp_nodes[i]); - } -} - -static void -#ifdef __STDC__ -_ucdcmp_unload(void) -#else -_ucdcmp_unload() -#endif -{ - if (_ucdcmp_size == 0) - return; - - /* - * Only need to free the offsets because the memory is allocated as a - * single block. - */ - free((char *) _ucdcmp_nodes); - _ucdcmp_size = 0; -} - -int -#ifdef __STDC__ -ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp) -#else -ucdecomp(code, num, decomp) -unsigned long code, *num, **decomp; -#endif -{ - long l, r, m; - - l = 0; - r = _ucdcmp_nodes[_ucdcmp_size] - 1; - - while (l <= r) { - /* - * Determine a "mid" point and adjust to make sure the mid point is at - * the beginning of a code+offset pair. - */ - m = (l + r) >> 1; - m -= (m & 1); - if (code > _ucdcmp_nodes[m]) - l = m + 2; - else if (code < _ucdcmp_nodes[m]) - r = m - 2; - else if (code == _ucdcmp_nodes[m]) { - *num = _ucdcmp_nodes[m + 3] - _ucdcmp_nodes[m + 1]; - *decomp = &_ucdcmp_decomp[_ucdcmp_nodes[m + 1]]; - return 1; - } - } - return 0; -} - -int -#ifdef __STDC__ -ucdecomp_hangul(unsigned long code, unsigned long *num, unsigned long decomp[]) -#else -ucdecomp_hangul(code, num, decomp) -unsigned long code, *num, decomp[]; -#endif -{ - if (!ucishangul(code)) - return 0; - - code -= 0xac00; - decomp[0] = 0x1100 + (unsigned long) (code / 588); - decomp[1] = 0x1161 + (unsigned long) ((code % 588) / 28); - decomp[2] = 0x11a7 + (unsigned long) (code % 28); - *num = (decomp[2] != 0x11a7) ? 3 : 2; - - return 1; -} - -/************************************************************************** - * - * Support for combining classes. - * - **************************************************************************/ - -static unsigned long _uccmcl_size; -static unsigned long *_uccmcl_nodes; - -static void -#ifdef __STDC__ -_uccmcl_load(char *paths, int reload) -#else -_uccmcl_load(paths, reload) -char *paths; -int reload; -#endif -{ - FILE *in; - unsigned long i; - _ucheader_t hdr; - - if (_uccmcl_size > 0) { - if (!reload) - /* - * The combining classes have already been loaded. - */ - return; - - free((char *) _uccmcl_nodes); - _uccmcl_size = 0; - } - - if ((in = _ucopenfile(paths, "cmbcl.dat", "rb")) == 0) - return; - - /* - * Load the header. - */ - fread((char *) &hdr, sizeof(_ucheader_t), 1, in); - - if (hdr.bom == 0xfffe) { - hdr.cnt = endian_short(hdr.cnt); - hdr.size.bytes = endian_long(hdr.size.bytes); - } - - _uccmcl_size = hdr.cnt * 3; - _uccmcl_nodes = (unsigned long *) malloc(hdr.size.bytes); - - /* - * Read the combining classes in. - */ - fread((char *) _uccmcl_nodes, sizeof(unsigned long), _uccmcl_size, in); - - /* - * Do an endian swap if necessary. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i < _uccmcl_size; i++) - _uccmcl_nodes[i] = endian_long(_uccmcl_nodes[i]); - } -} - -static void -#ifdef __STDC__ -_uccmcl_unload(void) -#else -_uccmcl_unload() -#endif -{ - if (_uccmcl_size == 0) - return; - - free((char *) _uccmcl_nodes); - _uccmcl_size = 0; -} - -unsigned long -#ifdef __STDC__ -uccombining_class(unsigned long code) -#else -uccombining_class(code) -unsigned long code; -#endif -{ - long l, r, m; - - l = 0; - r = _uccmcl_size - 1; - - while (l <= r) { - m = (l + r) >> 1; - m -= (m % 3); - if (code > _uccmcl_nodes[m + 1]) - l = m + 3; - else if (code < _uccmcl_nodes[m]) - r = m - 3; - else if (code >= _uccmcl_nodes[m] && code <= _uccmcl_nodes[m + 1]) - return _uccmcl_nodes[m + 2]; - } - return 0; -} - -/************************************************************************** - * - * Support for numeric values. - * - **************************************************************************/ - -static unsigned long *_ucnum_nodes; -static unsigned long _ucnum_size; -static short *_ucnum_vals; - -static void -#ifdef __STDC__ -_ucnumb_load(char *paths, int reload) -#else -_ucnumb_load(paths, reload) -char *paths; -int reload; -#endif -{ - FILE *in; - unsigned long size, i; - _ucheader_t hdr; - - if (_ucnum_size > 0) { - if (!reload) - /* - * The numbers have already been loaded. - */ - return; - - free((char *) _ucnum_nodes); - _ucnum_size = 0; - } - - if ((in = _ucopenfile(paths, "num.dat", "rb")) == 0) - return; - - /* - * Load the header. - */ - fread((char *) &hdr, sizeof(_ucheader_t), 1, in); - - if (hdr.bom == 0xfffe) { - hdr.cnt = endian_short(hdr.cnt); - hdr.size.bytes = endian_long(hdr.size.bytes); - } - - _ucnum_size = hdr.cnt; - _ucnum_nodes = (unsigned long *) malloc(hdr.size.bytes); - _ucnum_vals = (short *) (_ucnum_nodes + _ucnum_size); - - /* - * Read the combining classes in. - */ - fread((char *) _ucnum_nodes, sizeof(unsigned char), hdr.size.bytes, in); - - /* - * Do an endian swap if necessary. - */ - if (hdr.bom == 0xfffe) { - for (i = 0; i < _ucnum_size; i++) - _ucnum_nodes[i] = endian_long(_ucnum_nodes[i]); - - /* - * Determine the number of values that have to be adjusted. - */ - size = (hdr.size.bytes - - (_ucnum_size * (sizeof(unsigned long) << 1))) / - sizeof(short); - - for (i = 0; i < size; i++) - _ucnum_vals[i] = endian_short(_ucnum_vals[i]); - } -} - -static void -#ifdef __STDC__ -_ucnumb_unload(void) -#else -_ucnumb_unload() -#endif -{ - if (_ucnum_size == 0) - return; - - free((char *) _ucnum_nodes); - _ucnum_size = 0; -} - -int -#ifdef __STDC__ -ucnumber_lookup(unsigned long code, struct ucnumber *num) -#else -ucnumber_lookup(code, num) -unsigned long code; -struct ucnumber *num; -#endif -{ - long l, r, m; - short *vp; - - l = 0; - r = _ucnum_size - 1; - while (l <= r) { - /* - * Determine a "mid" point and adjust to make sure the mid point is at - * the beginning of a code+offset pair. - */ - m = (l + r) >> 1; - m -= (m & 1); - if (code > _ucnum_nodes[m]) - l = m + 2; - else if (code < _ucnum_nodes[m]) - r = m - 2; - else { - vp = _ucnum_vals + _ucnum_nodes[m + 1]; - num->numerator = (int) *vp++; - num->denominator = (int) *vp; - return 1; - } - } - return 0; -} - -int -#ifdef __STDC__ -ucdigit_lookup(unsigned long code, int *digit) -#else -ucdigit_lookup(code, digit) -unsigned long code; -int *digit; -#endif -{ - long l, r, m; - short *vp; - - l = 0; - r = _ucnum_size - 1; - while (l <= r) { - /* - * Determine a "mid" point and adjust to make sure the mid point is at - * the beginning of a code+offset pair. - */ - m = (l + r) >> 1; - m -= (m & 1); - if (code > _ucnum_nodes[m]) - l = m + 2; - else if (code < _ucnum_nodes[m]) - r = m - 2; - else { - vp = _ucnum_vals + _ucnum_nodes[m + 1]; - if (*vp == *(vp + 1)) { - *digit = *vp; - return 1; - } - return 0; - } - } - return 0; -} - -struct ucnumber -#ifdef __STDC__ -ucgetnumber(unsigned long code) -#else -ucgetnumber(code) -unsigned long code; -#endif -{ - struct ucnumber num; - - /* - * Initialize with some arbitrary value, because the caller simply cannot - * tell for sure if the code is a number without calling the ucisnumber() - * macro before calling this function. - */ - num.numerator = num.denominator = -111; - - (void) ucnumber_lookup(code, &num); - - return num; -} - -int -#ifdef __STDC__ -ucgetdigit(unsigned long code) -#else -ucgetdigit(code) -unsigned long code; -#endif -{ - int dig; - - /* - * Initialize with some arbitrary value, because the caller simply cannot - * tell for sure if the code is a number without calling the ucisdigit() - * macro before calling this function. - */ - dig = -111; - - (void) ucdigit_lookup(code, &dig); - - return dig; -} - -/************************************************************************** - * - * Setup and cleanup routines. - * - **************************************************************************/ - -void -#ifdef __STDC__ -ucdata_load(char *paths, int masks) -#else -ucdata_load(paths, masks) -char *paths; -int masks; -#endif -{ - if (masks & UCDATA_CTYPE) - _ucprop_load(paths, 0); - if (masks & UCDATA_CASE) - _uccase_load(paths, 0); - if (masks & UCDATA_DECOMP) - _ucdcmp_load(paths, 0); - if (masks & UCDATA_CMBCL) - _uccmcl_load(paths, 0); - if (masks & UCDATA_NUM) - _ucnumb_load(paths, 0); -} - -void -#ifdef __STDC__ -ucdata_unload(int masks) -#else -ucdata_unload(masks) -int masks; -#endif -{ - if (masks & UCDATA_CTYPE) - _ucprop_unload(); - if (masks & UCDATA_CASE) - _uccase_unload(); - if (masks & UCDATA_DECOMP) - _ucdcmp_unload(); - if (masks & UCDATA_CMBCL) - _uccmcl_unload(); - if (masks & UCDATA_NUM) - _ucnumb_unload(); -} - -void -#ifdef __STDC__ -ucdata_reload(char *paths, int masks) -#else -ucdata_reload(paths, masks) -char *paths; -int masks; -#endif -{ - if (masks & UCDATA_CTYPE) - _ucprop_load(paths, 1); - if (masks & UCDATA_CASE) - _uccase_load(paths, 1); - if (masks & UCDATA_DECOMP) - _ucdcmp_load(paths, 1); - if (masks & UCDATA_CMBCL) - _uccmcl_load(paths, 1); - if (masks & UCDATA_NUM) - _ucnumb_load(paths, 1); -} - -#ifdef TEST - -void -#ifdef __STDC__ -main(void) -#else -main() -#endif -{ - int dig; - unsigned long i, lo, *dec; - struct ucnumber num; - - ucdata_setup("."); - - if (ucisweak(0x30)) - printf("WEAK\n"); - else - printf("NOT WEAK\n"); - - printf("LOWER 0x%04lX\n", uctolower(0xff3a)); - printf("UPPER 0x%04lX\n", uctoupper(0xff5a)); - - if (ucisalpha(0x1d5)) - printf("ALPHA\n"); - else - printf("NOT ALPHA\n"); - - if (ucisupper(0x1d5)) { - printf("UPPER\n"); - lo = uctolower(0x1d5); - printf("0x%04lx\n", lo); - lo = uctotitle(0x1d5); - printf("0x%04lx\n", lo); - } else - printf("NOT UPPER\n"); - - if (ucistitle(0x1d5)) - printf("TITLE\n"); - else - printf("NOT TITLE\n"); - - if (uciscomposite(0x1d5)) - printf("COMPOSITE\n"); - else - printf("NOT COMPOSITE\n"); - - if (ucdecomp(0x1d5, &lo, &dec)) { - for (i = 0; i < lo; i++) - printf("0x%04lx ", dec[i]); - putchar('\n'); - } - - if ((lo = uccombining_class(0x41)) != 0) - printf("0x41 CCL %ld\n", lo); - - if (ucisxdigit(0xfeff)) - printf("0xFEFF HEX DIGIT\n"); - else - printf("0xFEFF NOT HEX DIGIT\n"); - - if (ucisdefined(0x10000)) - printf("0x10000 DEFINED\n"); - else - printf("0x10000 NOT DEFINED\n"); - - if (ucnumber_lookup(0x30, &num)) { - if (num.numerator != num.denominator) - printf("UCNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); - else - printf("UCNUMBER: 0x30 = %d\n", num.numerator); - } else - printf("UCNUMBER: 0x30 NOT A NUMBER\n"); - - if (ucnumber_lookup(0xbc, &num)) { - if (num.numerator != num.denominator) - printf("UCNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); - else - printf("UCNUMBER: 0xbc = %d\n", num.numerator); - } else - printf("UCNUMBER: 0xbc NOT A NUMBER\n"); - - - if (ucnumber_lookup(0xff19, &num)) { - if (num.numerator != num.denominator) - printf("UCNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); - else - printf("UCNUMBER: 0xff19 = %d\n", num.numerator); - } else - printf("UCNUMBER: 0xff19 NOT A NUMBER\n"); - - if (ucnumber_lookup(0x4e00, &num)) { - if (num.numerator != num.denominator) - printf("UCNUMBER: 0x4e00 = %d/%d\n", num.numerator, num.denominator); - else - printf("UCNUMBER: 0x4e00 = %d\n", num.numerator); - } else - printf("UCNUMBER: 0x4e00 NOT A NUMBER\n"); - - if (ucdigit_lookup(0x06f9, &dig)) - printf("UCDIGIT: 0x6f9 = %d\n", dig); - else - printf("UCDIGIT: 0x6f9 NOT A NUMBER\n"); - - dig = ucgetdigit(0x0969); - printf("UCGETDIGIT: 0x969 = %d\n", dig); - - num = ucgetnumber(0x30); - if (num.numerator != num.denominator) - printf("UCGETNUMBER: 0x30 = %d/%d\n", num.numerator, num.denominator); - else - printf("UCGETNUMBER: 0x30 = %d\n", num.numerator); - - num = ucgetnumber(0xbc); - if (num.numerator != num.denominator) - printf("UCGETNUMBER: 0xbc = %d/%d\n", num.numerator, num.denominator); - else - printf("UCGETNUMBER: 0xbc = %d\n", num.numerator); - - num = ucgetnumber(0xff19); - if (num.numerator != num.denominator) - printf("UCGETNUMBER: 0xff19 = %d/%d\n", num.numerator, num.denominator); - else - printf("UCGETNUMBER: 0xff19 = %d\n", num.numerator); - - ucdata_cleanup(); - exit(0); -} - -#endif /* TEST */ diff --git a/intl/unicharutil/ucdata.h b/intl/unicharutil/ucdata.h deleted file mode 100644 index 4b765cbd38f5..000000000000 --- a/intl/unicharutil/ucdata.h +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright 1996, 1997, 1998 Computing Research Labs, - * New Mexico State University - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT - * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef _h_ucdata -#define _h_ucdata - -/* - * $Id: ucdata.h,v 1.1 1999/01/08 00:19:12 ftang%netscape.com Exp $ - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#undef __ -#ifdef __STDC__ -#define __(x) x -#else -#define __(x) () -#endif - -#define UCDATA_VERSION "1.9" - -/************************************************************************** - * - * Masks and macros for character properties. - * - **************************************************************************/ - -/* - * Values that can appear in the `mask1' parameter of the ucisprop() - * function. - */ -#define UC_MN 0x00000001 /* Mark, Non-Spacing */ -#define UC_MC 0x00000002 /* Mark, Spacing Combining */ -#define UC_ME 0x00000004 /* Mark, Enclosing */ -#define UC_ND 0x00000008 /* Number, Decimal Digit */ -#define UC_NL 0x00000010 /* Number, Letter */ -#define UC_NO 0x00000020 /* Number, Other */ -#define UC_ZS 0x00000040 /* Separator, Space */ -#define UC_ZL 0x00000080 /* Separator, Line */ -#define UC_ZP 0x00000100 /* Separator, Paragraph */ -#define UC_CC 0x00000200 /* Other, Control */ -#define UC_CF 0x00000400 /* Other, Format */ -#define UC_OS 0x00000800 /* Other, Surrogate */ -#define UC_CO 0x00001000 /* Other, Private Use */ -#define UC_CN 0x00002000 /* Other, Not Assigned */ -#define UC_LU 0x00004000 /* Letter, Uppercase */ -#define UC_LL 0x00008000 /* Letter, Lowercase */ -#define UC_LT 0x00010000 /* Letter, Titlecase */ -#define UC_LM 0x00020000 /* Letter, Modifier */ -#define UC_LO 0x00040000 /* Letter, Other */ -#define UC_PC 0x00080000 /* Punctuation, Connector */ -#define UC_PD 0x00100000 /* Punctuation, Dash */ -#define UC_PS 0x00200000 /* Punctuation, Open */ -#define UC_PE 0x00400000 /* Punctuation, Close */ -#define UC_PO 0x00800000 /* Punctuation, Other */ -#define UC_SM 0x01000000 /* Symbol, Math */ -#define UC_SC 0x02000000 /* Symbol, Currency */ -#define UC_SK 0x04000000 /* Symbol, Modifier */ -#define UC_SO 0x08000000 /* Symbol, Other */ -#define UC_L 0x10000000 /* Left-To-Right */ -#define UC_R 0x20000000 /* Right-To-Left */ -#define UC_EN 0x40000000 /* European Number */ -#define UC_ES 0x80000000 /* European Number Separator */ - -/* - * Values that can appear in the `mask2' parameter of the ucisprop() - * function. - */ -#define UC_ET 0x00000001 /* European Number Terminator */ -#define UC_AN 0x00000002 /* Arabic Number */ -#define UC_CS 0x00000004 /* Common Number Separator */ -#define UC_B 0x00000008 /* Block Separator */ -#define UC_S 0x00000010 /* Segment Separator */ -#define UC_WS 0x00000020 /* Whitespace */ -#define UC_ON 0x00000040 /* Other Neutrals */ -/* - * Implementation specific character properties. - */ -#define UC_CM 0x00000080 /* Composite */ -#define UC_NB 0x00000100 /* Non-Breaking */ -#define UC_SY 0x00000200 /* Symmetric */ -#define UC_HD 0x00000400 /* Hex Digit */ -#define UC_QM 0x00000800 /* Quote Mark */ -#define UC_MR 0x00001000 /* Mirroring */ -#define UC_SS 0x00002000 /* Space, other */ - -#define UC_CP 0x00004000 /* Defined */ - -/* - * Added for UnicodeData-2.1.3. - */ -#define UC_PI 0x00008000 /* Punctuation, Initial */ -#define UC_PF 0x00010000 /* Punctuation, Final */ - -/* - * This is the primary function for testing to see if a character has some set - * of properties. The macros that test for various character properties all - * call this function with some set of masks. - */ -extern int ucisprop __((unsigned long code, unsigned long mask1, - unsigned long mask2)); - -#define ucisalpha(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT, 0) -#define ucisdigit(cc) ucisprop(cc, UC_ND, 0) -#define ucisalnum(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT|UC_ND, 0) -#define uciscntrl(cc) ucisprop(cc, UC_CC|UC_CF, 0) -#define ucisspace(cc) ucisprop(cc, UC_ZS|UC_SS, 0) -#define ucisblank(cc) ucisprop(cc, UC_ZS, 0) -#define ucispunct(cc) ucisprop(cc, UC_PD|UC_PS|UC_PE|UC_PO, UC_PI|UC_PF) -#define ucisgraph(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\ - UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\ - UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\ - UC_SO, UC_PI|UC_PF) -#define ucisprint(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\ - UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\ - UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\ - UC_SO|UC_ZS, UC_PI|UC_PF) -#define ucisupper(cc) ucisprop(cc, UC_LU, 0) -#define ucislower(cc) ucisprop(cc, UC_LL, 0) -#define ucistitle(cc) ucisprop(cc, UC_LT, 0) -#define ucisxdigit(cc) ucisprop(cc, 0, UC_HD) - -#define ucisisocntrl(cc) ucisprop(cc, UC_CC, 0) -#define ucisfmtcntrl(cc) ucisprop(cc, UC_CF, 0) - -#define ucissymbol(cc) ucisprop(cc, UC_SM|UC_SC|UC_SO|UC_SK, 0) -#define ucisnumber(cc) ucisprop(cc, UC_ND|UC_NO|UC_NL, 0) -#define ucisnonspacing(cc) ucisprop(cc, UC_MN, 0) -#define ucisopenpunct(cc) ucisprop(cc, UC_PS, 0) -#define ucisclosepunct(cc) ucisprop(cc, UC_PE, 0) -#define ucisinitialpunct(cc) ucisprop(cc, 0, UC_PI) -#define ucisfinalpunct(cc) ucisprop(cc, 0, UC_PF) - -#define uciscomposite(cc) ucisprop(cc, 0, UC_CM) -#define ucishex(cc) ucisprop(cc, 0, UC_HD) -#define ucisquote(cc) ucisprop(cc, 0, UC_QM) -#define ucissymmetric(cc) ucisprop(cc, 0, UC_SY) -#define ucismirroring(cc) ucisprop(cc, 0, UC_MR) -#define ucisnonbreaking(cc) ucisprop(cc, 0, UC_NB) - -/* - * Directionality macros. - */ -#define ucisrtl(cc) ucisprop(cc, UC_R, 0) -#define ucisltr(cc) ucisprop(cc, UC_L, 0) -#define ucisstrong(cc) ucisprop(cc, UC_L|UC_R, 0) -#define ucisweak(cc) ucisprop(cc, UC_EN|UC_ES, UC_ET|UC_AN|UC_CS) -#define ucisneutral(cc) ucisprop(cc, 0, UC_B|UC_S|UC_WS|UC_ON) -#define ucisseparator(cc) ucisprop(cc, 0, UC_B|UC_S) - -/* - * Other macros inspired by John Cowan. - */ -#define ucismark(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME, 0) -#define ucismodif(cc) ucisprop(cc, UC_LM, 0) -#define ucisletnum(cc) ucisprop(cc, UC_NL, 0) -#define ucisconnect(cc) ucisprop(cc, UC_PC, 0) -#define ucisdash(cc) ucisprop(cc, UC_PD, 0) -#define ucismath(cc) ucisprop(cc, UC_SM, 0) -#define uciscurrency(cc) ucisprop(cc, UC_SC, 0) -#define ucismodifsymbol(cc) ucisprop(cc, UC_SK, 0) -#define ucisnsmark(cc) ucisprop(cc, UC_MN, 0) -#define ucisspmark(cc) ucisprop(cc, UC_MC, 0) -#define ucisenclosing(cc) ucisprop(cc, UC_ME, 0) -#define ucisprivate(cc) ucisprop(cc, UC_CO, 0) -#define ucissurrogate(cc) ucisprop(cc, UC_OS, 0) -#define ucislsep(cc) ucisprop(cc, UC_ZL, 0) -#define ucispsep(cc) ucisprop(cc, UC_ZP, 0) - -#define ucisidentstart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL, 0) -#define ucisidentpart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL|\ - UC_MN|UC_MC|UC_ND|UC_PC|UC_CF, 0) - -#define ucisdefined(cc) ucisprop(cc, 0, UC_CP) -#define ucisundefined(cc) !ucisprop(cc, 0, UC_CP) - -/* - * Other miscellaneous character property macros. - */ -#define ucishan(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\ - ((cc) >= 0xf900 && (cc) <= 0xfaff)) -#define ucishangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff) - -/************************************************************************** - * - * Functions for case conversion. - * - **************************************************************************/ - -extern unsigned long uctoupper __((unsigned long code)); -extern unsigned long uctolower __((unsigned long code)); -extern unsigned long uctotitle __((unsigned long code)); - -/************************************************************************** - * - * Functions for getting decompositions. - * - **************************************************************************/ - -/* - * This routine determines if the code has a decomposition. If it returns 0, - * there is no decomposition. Any other value indicates a decomposition was - * returned. - */ -extern int ucdecomp __((unsigned long code, unsigned long *num, - - unsigned long **decomp)); - -/* - * If the code is a Hangul syllable, this routine decomposes it into the array - * passed. The array size should be at least 3. - */ -extern int ucdecomp_hangul __((unsigned long code, unsigned long *num, - unsigned long decomp[])); - -/************************************************************************** - * - * Functions for getting combining classes. - * - **************************************************************************/ - -/* - * This will return the combining class for a character to be used with the - * Canonical Ordering algorithm. - */ -extern unsigned long uccombining_class __((unsigned long code)); - -/************************************************************************** - * - * Functions for getting numbers and digits. - * - **************************************************************************/ - -struct ucnumber { - int numerator; - int denominator; -}; - -extern int ucnumber_lookup __((unsigned long code, struct ucnumber *num)); -extern int ucdigit_lookup __((unsigned long code, int *digit)); - -/* - * For compatibility with John Cowan's "uctype" package. - */ -extern struct ucnumber ucgetnumber __((unsigned long code)); -extern int ucgetdigit __((unsigned long code)); - -/************************************************************************** - * - * Functions library initialization and cleanup. - * - **************************************************************************/ - -/* - * Macros for specifying the data tables to be loaded for ucdata_load(). - */ -#define UCDATA_CASE 0x01 -#define UCDATA_CTYPE 0x02 -#define UCDATA_DECOMP 0x04 -#define UCDATA_CMBCL 0x08 -#define UCDATA_NUM 0x10 - -#define UCDATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\ - UCDATA_CMBCL|UCDATA_NUM) - -/* - * Functions to load, unload, and reload specific data files. - */ -extern void ucdata_load __((char *paths, int mask)); -extern void ucdata_unload __((int mask)); -extern void ucdata_reload __((char *paths, int mask)); - -/* - * Deprecated functions, now just compatibility macros. - */ -#define ucdata_setup(p) ucdata_load(p, UCDATA_ALL) -#define ucdata_cleanup() ucdata_unload(UCDATA_ALL) - -#undef __ - -#ifdef __cplusplus -} -#endif - -#endif /* _h_ucdata */