Bug 724531 - Import ICU library into the mozilla tree. r=dmandelin for importing the ICU sources

--HG--
extra : rebase_source : 2561787b6f9ae3dc626cb6bf76e6f78f6cd15664
This commit is contained in:
Norbert Lindenberg 2013-03-01 20:58:49 -08:00
Родитель 5ca04f8ec9
Коммит c0b65e4a8f
3255 изменённых файлов: 1137125 добавлений и 0 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

10
intl/icu/SVN-INFO Normal file
Просмотреть файл

@ -0,0 +1,10 @@
Path: release-50-1-2
URL: http://source.icu-project.org/repos/icu/icu/tags/release-50-1-2
Repository Root: http://source.icu-project.org/repos/icu
Repository UUID: 251d0590-4201-4cf1-90de-194747b24ca1
Revision: 33305
Node Kind: directory
Last Changed Author: mow
Last Changed Rev: 33098
Last Changed Date: 2013-01-30 15:54:50 -0800 (Wed, 30 Jan 2013)

38
intl/icu/as_is/bomlist.py Normal file
Просмотреть файл

@ -0,0 +1,38 @@
#!/usr/bin/python
# Copyright (C) 2011 IBM Corporation and Others. All Rights Reserved.
#
# run in icu/
# will create file icu/as_is/bomlist.txt
#
# Usage:
# ( python as_is/bomlist.py > as_is/bomlist.txt ) || rm -f as_is/bomlist.txt
import os
import codecs
tree = os.walk(".")
nots=0
notutf8=0
noprops=0
utf8=0
fixed=0
tfiles=0
bom=codecs.BOM_UTF8
for ent in tree:
(path,dirs,files) = ent
if(path.find("/.svn") != -1):
continue
for file in files:
tfiles=tfiles+1
fp = (path + "/" + file)
if not os.path.isfile(fp):
continue
f = open(fp, 'rb')
bytes=f.read(3)
if bytes and (bytes == bom):
print 'icu/'+fp[2::]
f.close()

102
intl/icu/as_is/os390/unpax-icu.sh Executable file
Просмотреть файл

@ -0,0 +1,102 @@
#!/bin/sh
# Copyright (C) 2001-2010, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Steven R. Loomis
# George Rhoten
#
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
# After extracting to EBCDIC, binary files are re-extracted without the
# EBCDIC conversion, thus restoring them to original codepage.
#
# Set the following variable to the list of binary file suffixes (extensions)
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
#ICU specific binary files
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM'
usage()
{
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
}
# first make sure we at least one arg and it's a file we can read
if [ $# -eq 0 ]; then
usage
exit
fi
tar_file=$1
if [ ! -r $tar_file ]; then
echo "$tar_file does not exist or cannot be read."
usage
exit
fi
echo ""
echo "Extracting from $tar_file ..."
echo ""
# extract files while converting them to EBCDIC
pax -rvf $tar_file -o to=IBM-1047,from=ISO8859-1 -o setfiletag
echo ""
echo "Determining binary files ..."
echo ""
# When building in ASCII mode, text files are converted as ASCII
if [ "${ICU_ENABLE_ASCII_STRINGS}" -eq 1 ]; then
binary_suffixes="$binary_suffixes txt TXT ucm UCM"
else
for file in `find ./icu \( -name \*.txt -print \) | sed -e 's/^\.\///'`; do
bom8=`head -c 3 $file|\
od -t x1|\
head -n 1|\
sed 's/ */ /g'|\
cut -f2-4 -d ' '|\
tr 'A-Z' 'a-z'`;
#Find a converted UTF-8 BOM
if [ "$bom8" = "57 8b ab" ]
then
binary_files="$binary_files $file";
fi
done
fi
for i in $(pax -f $tar_file 2>/dev/null)
do
case $i in
*/) ;; # then this entry is a directory
*.*) # then this entry has a dot in the filename
for j in $binary_suffixes
do
# We substitute the suffix more than once
# to handle files like NormalizationTest-3.2.0.txt
suf=${i#*.*}
suf=${suf#*.*}
suf=${suf#*.*}
if [ "$suf" = "$j" ]
then
binary_files="$binary_files $i"
break
fi
done
;;
*) ;; # then this entry does not have a dot in it
esac
done
# now see if a re-extract of binary files is necessary
if [ ${#binary_files} -eq 0 ]; then
echo ""
echo "There are no binary files to restore."
else
echo "Restoring binary files ..."
echo ""
rm $binary_files
pax -rvf $tar_file $binary_files
# Tag the files as binary for proper interaction with the _BPXK_AUTOCVT
# environment setting
chtag -b $binary_files
fi
echo ""
echo "$0 has completed extracting ICU from $tar_file."

Просмотреть файл

@ -0,0 +1,6 @@
#!/bin/sh
# /* Copyright (C) 2011-2012 IBM Corporation and Others. All Rights Reserved */
icc -o iculd iculd.c
icc -o cxxfilt cxxfilt.cpp

Просмотреть файл

@ -0,0 +1,33 @@
# Copyright (C) 2006-2011, International Business Machines Corporation
# and others. All Rights Reserved.
#
# Use "test -x" instead of "test -f" most of the time.
# due to how executables are created in a different file system.
s/as_executable_p="test -f"/as_executable_p="test -x"/g
s/test -f "$ac_file"/test -x "$ac_file"/g
s/test -f $ac_dir\/install-sh/test -x $ac_dir\/install-sh/g
s/test -f $ac_dir\/install.sh/test -x $ac_dir\/install.sh/g
s/test -f $ac_dir\/shtool/test -x $ac_dir\/shtool/g
# Use the more efficient del instead of rm command.
s/rm[ ]*-r[ ]*-f/del -f/g
s/rm[ ]*-f[ ]*-r/del -f/g
s/rm[ ]*-rf/del -f/g
s/rm[ ]*-fr/del -f/g
s/rm[ ]*-f/del -f/g
##don't clean up some awks for debugging
#s/[ ]*del -f [^ ]*.awk/#&/
# Borne shell isn't always available on i5/OS
s/\/bin\/sh/\/usr\/bin\/qsh/g
# no diff in qsh the equivalent is cmp
s/ diff / cmp -s /g
## srl
# trouble w/ redirects.
s% >&$3%%g
s% >&$4% >$4%g
s%^ac_cr=%# AWK reads ASCII, not EBCDIC\
touch -C 819 $tmp/defines.awk $tmp/subs.awk $tmp/subs1.awk conf$$subs.awk\
\
&%
##OBSOLETE
#(REPLACED BY CPP in runConfigureICU) Use -c qpponly instead of -E to enable the preprocessor on the compiler
#s/\$CC -E/\$CC -c -qpponly/g

Просмотреть файл

@ -0,0 +1,37 @@
/* Copyright (C) 2012 IBM Corporation and Others. All Rights Reserved */
#include <stdio.h>
#include <demangle.h>
void showSym(char *str) {
char *rest;
struct Name *name = Demangle(str, rest); // "f__1XFi"
printf("# '%s'\n", str);
if(*rest) printf("\trest: '%s'\n", rest);
if(name->Kind() == MemberFunction) {
//((MemberFunctionName *) name)->Scope()->Text() is "X"
//((MemberFunctionName *) name)->RootName() is "f"
//((MemberFunctionName *) name)->Text() is "X::f(int)"
printf("\t=> %s\n", ((MemberFunctionName *) name)->Text());
} else {
printf("\t(not MemberFunction)\n");
}
}
int main(int argc, /*const*/ char *argv[]) {
if(argc>1) {
for(int i=1;i<argc;i++) {
showSym(argv[i]);
}
} else {
printf("Usage: %s <symbol> ...\n", argv[0]);
}
}

Просмотреть файл

@ -0,0 +1,65 @@
#!/usr/bin/qsh
# Copyright (C) 2000-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Barry Novinger
# Steven R. Loomis
# George Rhoten
# Jason Spieth
#
#
# This script detects if any UTF-8 files were incorrectly converted to EBCDIC, and
# converts them back.
if [ -z "$QSH_VERSION" ];
then
QSH=0
echo "QSH not detected (QSH_VERSION not set) - just testing."
else
QSH=1
#echo "QSH version $QSH_VERSION"
fi
export QSH
tar_file=$1
echo ""
echo "Determining binary files by BOM ..."
echo ""
bin_count=0
binary_files=""
# Process BOMs
for file in `find ./icu/source/data/unidata \( -name \*.txt -print \)`; do
bom8=`od -t x1 -N 3 $file|\
head -n 1|\
cut -c10-18`;
#Find a converted UTF-8 BOM
echo "file $file bom /${bom8}/"
if [ "$bom8" = "57 8b ab" ]
then
file="`echo $file | cut -d / -f2-`"
echo "converting ${file}"
if [ `echo $binary_files | wc -w` -lt 200 ]
then
bin_count=`expr $bin_count + 1`
binary_files="$binary_files $file";
else
echo "Restoring binary files by BOM ($bin_count)..."
rm $binary_files;
pax -C 819 -rvf $tar_file $binary_files;
echo "Determining binary files by BOM ($bin_count)..."
binary_files="$file";
bin_count=`expr $bin_count + 1`
fi
fi
done
if [ `echo $binary_files | wc -w` -gt 0 ]
then
echo restoring
rm $binary_files
pax -C 819 -rvf $tar_file $binary_files
fi

Просмотреть файл

@ -0,0 +1,249 @@
/* Copyright (C) 2011 IBM Corporation and Others. All Rights Reserved */
/**
Input:
-o makeconv makeconv.o ucnvstat.o ../../lib/libicuuc48.so -qOPTION='*DUPPROC *DUPVAR*'
CRTPGM PGM(SRLICU/MAKECONV) MODULE(SRLICU/MAKECONV SRLICU/UCNVSTAT SRLICU/GENMBCS SRLICU/GENCNVEX) BNDSRVPGM(SRLICU/LIBICUUC48 SRLICU/LIBICUTU48 SRLICU/LIBICUIN48) OPTION(*DUPPROC *DUPVAR) REPLACE(*YES)
Handles .o ( modules ), .so ( srvpgm ), .a ( bnddir ).
TODO:
- cleanup
- much better error handling
- factor common code
- instead of caring about .o vs .so vs .a, just read the link - if it ends in .srvpgm then treat it as a service program, etc.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#ifndef TEST_MODE
#define TEST_MODE 0
#endif
#if !TEST_MODE
#include <qp0z1170.h>
#else
static int Qp0zSystem(const char *cmd) {
printf("CL: %s\n", cmd);
return 0;
}
#endif
static int runcmd(const char *cmd) {
int rc;
printf("%s\n", cmd);
rc = Qp0zSystem(cmd);
if(rc==0) {
printf("..ok\n");
return 0;
} else if(rc<0){
printf("..Qp0zSystem failed.\n");
return 1;
} else {
printf("..System call failed.\n");
return 1;
}
}
int main(int argc, const char *argv[]) {
int i;
char buf[8048];
char opt[4100];
char objs[4024];
char libs[4024];
char bnddirs[4024];
const char *prog="";
const char *progshort=prog;
const char *outputdir=getenv("OUTPUTDIR");
printf("# OUTPUTDIR=%s ",outputdir);
for(i=0;i<argc;i++) {
printf("%s ", argv[i]);
}
printf("\n");
buf[0]=0;
opt[0]=0;
objs[0]=0;
libs[0]=0;
bnddirs[0]=0;
for(i=1;i<argc;i++) {
if(argv[i][0]=='-') {
switch(argv[i][1]) {
case 'O':
printf(".. ignoring optimization: %s\n", argv[i]);
break;
case 'g':
printf(".. ignoring debugging: %s\n", argv[i]);
break;
case 'l':
printf(".. ignoring lib: %s\n", argv[i]);
break;
case 'v':
printf(".. already verbose\n");
break;
case 'o':
i++;
prog=argv[i];
progshort=strrchr(prog,'/');
if(!progshort) {
progshort=prog;
} else {
progshort++; /* / */
}
break;
case 'q':
if(!strncmp(argv[i]+2,"OPTION=",7)) {
strcat(opt,argv[i]+9);
} else {
printf("Unknown -q option: %s\n", argv[i]);
return 1;
}
break;
default:
printf("Unknown option: %s\n", argv[i]);
return 1;
}
} else {
int n = strlen(argv[i]);
if(argv[i][n-1]=='o' &&
argv[i][n-2]=='.') {
const char *b = argv[i];
char linkbuf[200];
char outbuf[100];
int nlen = n-2;
if(nlen >= 10) {
nlen = 10;
}
if(readlink(b,linkbuf,200)>0) {
/* printf("linkbuf %s for %s\n", linkbuf, b); */
/* /qsys.lib/srlicu.lib/currtest.module */
char *mend = strrchr(linkbuf,'.');
if(mend) {
*mend=0;
mend = strrchr(linkbuf,'/');
if(mend) {
mend++;
strcpy(outbuf,mend);
b=outbuf;
nlen=strlen(b);
}
}
} else {
/* perror("readlink");
puts(b); */
}
strcat(objs,outputdir);
strcat(objs,"/");
strncat(objs,b,nlen);
strcat(objs, " ");
} else if(argv[i][n-1]=='a' &&
argv[i][n-2]=='.') {
const char *b = argv[i];
char linkbuf[200];
char outbuf[100];
int nlen = n-2;
if(nlen >= 10) {
nlen = 10;
}
if(readlink(b,linkbuf,200)>0) {
/* printf("linkbuf %s for %s\n", linkbuf, b); */
/* /qsys.lib/srlicu.lib/currtest.srvpgm */
char *mend = strrchr(linkbuf,'.');
if(mend) {
*mend=0;
mend = strrchr(linkbuf,'/');
if(mend) {
mend++;
strcpy(outbuf,mend);
b=outbuf;
nlen=strlen(b);
}
}
} else {
/* perror("readlink");
puts(b); */
}
strcat(bnddirs,outputdir);
strcat(bnddirs,"/");
strncat(bnddirs,b,nlen);
strcat(bnddirs, " ");
} else if(argv[i][n-1]=='o' &&
argv[i][n-2]=='s' &&
argv[i][n-3]=='.') {
const char *p = strrchr(argv[i],'/');
if(!p) {
printf("Can't find trailing slash in %s\n", argv[i]);
return 1;
}
strcat(libs,outputdir);
strcat(libs,"/");
strncat(libs,p+1,strlen(p)-4);
strcat(libs," ");
} else {
printf("Unknown input file: %s\n", argv[i]);
return 1;
}
}
}
if(prog[0]==0) {
printf("no program (-o) option specified.\n");
return 1;
}
sprintf(buf,"CRTPGM PGM(%s/%s) MODULE(%s) BNDSRVPGM(%s) BNDDIR(%s) OPTION(%s) REPLACE(*YES)",
outputdir,progshort,
objs,
libs,
bnddirs,
opt);
if(runcmd(buf)) {
return 1;
}
/* -- OK */
{
char path1[1000];
sprintf(path1,"/qsys.lib/%s.lib/%s.pgm",
outputdir,
progshort);
printf("# ln -s %s %s\n", path1, prog);
if((!TEST_MODE) && symlink(path1,prog)) {
perror("symlink");
if(errno!=EEXIST) { /* ignored */
return 1;
}
}
}
return 0;
}

160
intl/icu/as_is/os400/unpax-icu.sh Executable file
Просмотреть файл

@ -0,0 +1,160 @@
#!/usr/bin/qsh
# Copyright (C) 2000-2011, International Business Machines
# Corporation and others. All Rights Reserved.
#
# Authors:
# Ami Fixler
# Barry Novinger
# Steven R. Loomis
# George Rhoten
# Jason Spieth
#
# Shell script to unpax ICU and convert the files to an EBCDIC codepage.
# After extracting to EBCDIC, binary files are re-extracted without the
# EBCDIC conversion, thus restoring them to original codepage.
if [ -z "$QSH_VERSION" ];
then
QSH=0
echo "QSH not detected (QSH_VERSION not set) - just testing."
else
QSH=1
#echo "QSH version $QSH_VERSION"
fi
export QSH
# Set the following variable to the list of binary file suffixes (extensions)
#****************************************************************************
#binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK'
#ICU specific binary files
#****************************************************************************
binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM'
data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*'
#****************************************************************************
# Function: usage
# Description: Prints out text that describes how to call this script
# Input: None
# Output: None
#****************************************************************************
usage()
{
echo "Enter archive filename as a parameter: $0 icu-archive.tar"
}
#****************************************************************************
# first make sure we at least one arg and it's a file we can read
#****************************************************************************
# check for no arguments
if [ $# -eq 0 ]; then
usage
exit
fi
# tar file is argument 1
tar_file=$1
# check that the file is valid
if [ ! -r $tar_file ]; then
echo "$tar_file does not exist or cannot be read."
usage
exit
fi
# treat all data files as ebcdic
ebcdic_data=$data_files
#****************************************************************************
# Extract files. We do this in two passes. One pass for 819 files and a
# second pass for 37 files
#****************************************************************************
echo ""
echo "Extracting from $tar_file ..."
echo ""
# extract everything as iso-8859-1 except these directories
pax -C 819 -rcvf $tar_file $ebcdic_data
# extract files while converting them to EBCDIC
echo ""
echo "Extracting files which must be in ibm-37 ..."
echo ""
pax -C 37 -rvf $tar_file $ebcdic_data
#****************************************************************************
# For files we have restored as CCSID 37, check the BOM to see if they
# should be processed as 819. Also handle files with special paths. Files
# that match will be added to binary files lists. The lists will in turn
# be processed to restore files as 819.
#****************************************************************************
echo ""
echo "Determining binary files by BOM ..."
echo ""
bin_count=0
# Process BOMs
if [ -f icu/as_is/bomlist.txt ];
then
echo "Using icu/as_is/bomlist.txt"
pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt`
else
for file in `find ./icu \( -name \*.txt -print \)`; do
bom8=`head -n 1 $file|\
od -t x1|\
head -n 1|\
sed 's/ */ /g'|\
cut -f2-4 -d ' '|\
tr 'A-Z' 'a-z'`;
#Find a converted UTF-8 BOM
if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ]
then
file="`echo $file | cut -d / -f2-`"
if [ `echo $binary_files | wc -w` -lt 200 ]
then
bin_count=`expr $bin_count + 1`
binary_files="$binary_files $file";
else
echo "Restoring binary files by BOM ($bin_count)..."
rm $binary_files;
pax -C 819 -rvf $tar_file $binary_files;
echo "Determining binary files by BOM ($bin_count)..."
binary_files="$file";
bin_count=`expr $bin_count + 1`
fi
fi
done
# now see if a re-extract of binary files is necessary
if [ `echo $binary_files | wc -w` -gt 0 ]
then
echo "Restoring binary files ($bin_count) ..."
rm $binary_files
pax -C 819 -rvf $tar_file $binary_files
fi
fi
echo "# Processing special paths."
# Process special paths
more_bin_files=$(find icu -type f \( -name '*.zzz' `echo $binary_suffixes | sed -e 's%[a-zA-Z]*%-o -name \*.&%g'` \) -print)
echo "Restoring binary files by special paths ($bin_count) ..."
rm $more_bin_files
pax -C 819 -rvf $tar_file $more_bin_files
#****************************************************************************
# Generate and run the configure script
#****************************************************************************
echo ""
echo "Generating qsh compatible configure ..."
echo ""
sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp
del -f icu/source/configure
mv icu/source/configureTemp icu/source/configure
chmod 755 icu/source/configure
echo ""
echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted."

472
intl/icu/icu4c.css Normal file
Просмотреть файл

@ -0,0 +1,472 @@
/*
* Default CSS style sheet for the ICU4C Open Source readme
* Copyright (C) 2005-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*/
/* Global styles */
body,p,li,ol,ul,th,td {
font-size: 1em;
font-family: "Arial", "Helvetica", sans-serif;
}
body {
margin: 1em;
}
body.draft {
background-image: url(images/draftbg.png);
}
.mainbody {
padding: 1em;
}
/*
* Customize the headers to have less space around them than usual
*/
h1 {
margin-bottom: .5em;
margin-top: .5em;
padding-bottom: .5em;
padding-top: .5em;
font-weight: 700;
font-size: 20pt;
font-family: Georgia, "Times New Roman", Times, serif;
border-width: 2px;
border-style: solid;
text-align: center;
width: 100%;
font-size: 200%;
font-weight: bold;
}
h2 {
border-top: 2px solid #22d;
border-left: 2px solid #22d;
margin-bottom: 0.5em;
padding-left: 4px;
margin-top: 12pt;
font-weight: 700;
font-size: 2em;
font-family: Georgia, "Times New Roman", Times, serif;
background-color: #eee;
page-break-before: always;
}
h2 a {
text-decoration: none;
color: black;
}
h2 a:hover {
color: blue;
text-decoration: underline;
}
h3 {
border-top: 1px solid gray;
color: #1e1c46;
margin-bottom: 0pt;
margin-top: 12pt;
padding-left: 0;
margin-left: 1em;
margin-top: 0.2em;
padding-bottom: 0.4em;
font-size: 1.5em;
font-family: Georgia, "Times New Roman", Times, serif;
}
h3 a {
text-decoration: none;
color: black;
}
h3 a:hover {
color: blue;
text-decoration: underline;
}
h4 {
margin-left: 1.5em;
margin-bottom: 0pt;
margin-top: 12pt;
font-size: 1.0em;
font-weight: bolder;
font-family: Georgia, "Times New Roman", Times, serif;
}
h4 a {
text-decoration: none;
color: black;
}
h4 a:hover {
color: blue;
text-decoration: underline;
}
h5, h6 {
margin-left: 1.8em;
margin-bottom: 0pt;
margin-top: 12pt;
padding-left: 0.75em;
font-size: x-small;
font-family: Georgia, "Times New Roman", Times, serif;
}
p,pre,table,ul,ol,dl {
margin-left: 2em;
}
/*
* Navigation sidebar on the left hand of most pages
*/
td.sidebar1 {
background-color: #99CCFF;
font-weight: 700;
margin-top: 0px;
margin-bottom: 0px;
padding-top: 1em;
padding-left: 0.2em;
white-space: nowrap;
}
td.sidebar2 {
background-color: #99CCFF;
margin-top: 0px;
margin-bottom: 0px;
margin-left: 0px;
padding-top: 1px;
padding-bottom: 1px;
padding-left: 1px;
padding-right: 0.5em;
white-space: nowrap;
text-decoration: none;
display: block;
}
td.sidebar2:hover {
background-color: #EEEEFF;
padding-top: 1px;
padding-bottom: 1px;
padding-left: 1px;
padding-right: 0.5em;
}
a.sidebar2 {
text-decoration: none;
display: block;
width: 100%;
}
a.sidebar2:link {
color: #000099;
display: block;
}
a.sidebar2:hover {
background-color: #EEEEFF;
display: block;
}
.underlinehover:hover {
background-color: #EEEEFF;
text-decoration: underline;
}
/* This is the faded header at the top */
td.fadedtop {
background-color: #006699;
background-image: url(http://www.icu-project.org/images/gr100.gif);
}
/* Related site on the left */
p.relatedsite {
color: White;
font-weight: 700;
font-size: 10pt;
margin-top: 1em;
margin-bottom: 0;
padding-left: 0.2em;
white-space: nowrap;
}
/* Related site on the left */
p.sidebar3 {
margin-top: 0.75em;
margin-bottom: 0;
padding-left: 0.8em;
}
a.sidebar3 {
font-size: 0.9em;
text-decoration: none;
}
a.sidebar3:link {
text-decoration: none;
color: White;
}
a.sidebar3:hover {
text-decoration: underline;
}
/* FAQ */
li.faq_contents {
font-weight: 500;
}
p.faq_q {
font-weight: 700;
margin-bottom: 0px;
}
p.faq_a {
margin-top: 0px;
}
/* News items */
table.newsItem {
padding-left: 1em;
padding-right: 1em;
border-width: medium;
}
th.newsItem {
background-color: #666666;
color: White;
}
td.newsItem {
background-color: #CCCCCC;
}
td.release-line,th.release-line {
padding-left: 0.5em;
padding-right: 0.5em;
white-space: nowrap;
border: 1px;
}
.note {
font-style: italic;
font-size: small;
margin-left: 1em;
}
samp {
margin-left: 1em;
margin-right: 2em;
border-style: groove;
padding: 1em;
display: block;
background-color: #EEEEEE
}
table.rtable caption {
margin-left: 2px;
margin-right: 2px;
padding: 3px;
font-weight: bold;
background-color: #dee2ff;
text-align: left;
}
table.rtable tr th {
background-color: #dee2ff;
text-align: left;
}
table.rtable tr td {
background-color: #c0c0fd;
padding: 3px;
}
table.rtable tr.broken td {
background-color: #fbb;
border: 1px dashed gray;
padding: 3px;
font-weight: bold;
}
table.rtable tr.rarely td {
background-color: #efe9c2;
padding: 3px;
font-style: italic;
}
/* APIChangeReport specific things */
.row0 {
background-color: white;
}
.row1 {
background-color: #dfd;
}
.verchange {
color: red;
font-weight: bold;
font-size: large;
}
.stabchange {
color: red;
font-size: large;
}
.bigwarn {
color: red;
background-color: white;
font-size: large;
margin: 0.5 em;
}
td.bornstable {
}
td.bornstable .bigwarn {
font-size: small;
white-space: nowrap;
}
table.genTable {
border-collapse: collapse;
border: 1px solid black;
}
/* 'everything inc version */
table.gentable td {
border: 1px solid gray;
padding: 0.25em;
font-size: small;
}
/* not version */
table.genTable td.file,
table.genTable td.proto {
border: none;
font-size: medium;
}
table.genTable td.file {
font-family: monospace;
font-weight: bold;
}
div.other .row0 {
background-color: white;
}
div.other .row1 {
background-color: #ddf;
}
table.docTable {
border-collapse: collapse;
border: 1px solid black;
}
/* 'everything inc version */
table.docTable td,
table.docTable th {
border: 1px solid gray;
padding: 0.25em;
font-size: small;
}
/* not version */
table.docTable td.file,
table.docTable td.proto {
border: none;
font-size: medium;
}
table.docTable td.file {
font-family: monospace;
font-weight: bold;
}
abbr {
border-bottom: 1px dashed #0B0;
}
h2.TOC {
page-break-before: auto;
}
body.readme {
}
caption {
font-weight: bold;
text-align: left
}
div.indent {
margin-left: 2em
}
ul.TOC {
list-style-type: none;
padding-left: 1em;
font-size: larger;
}
ul.TOC li a {
font-weight: bold;
}
ul.TOC li ul li a {
font-weight: normal;
list-style-type: none;
font-size: small;
}
ul.TOC li ul {
margin-left: 0;
padding-left: 2em;
font-weight: normal;
list-style-type: none;
}
pre.samp,samp {
margin-left: 1em;
border-style: groove;
padding: 1em;
display: block;
background-color: #EEEEEE
}
td.proto {
font-size: smaller;
}
@media print {
div#toc {
display: none;
}
table,tr,td,div {
page-break-inside: auto;
}
}

307
intl/icu/license.html Normal file
Просмотреть файл

@ -0,0 +1,307 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">
<title>ICU License - ICU 1.8.1 and later</title>
</head>
<body BGCOLOR="#ffffff">
<h2>ICU License - ICU 1.8.1 and later</h2>
<p>COPYRIGHT AND PERMISSION NOTICE</p>
<p>
Copyright (c) 1995-2012 International Business Machines Corporation and others
</p>
<p>
All rights reserved.
</p>
<p>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies
of the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.
</p>
<p>
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL
THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM,
OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE
USE OR PERFORMANCE OF THIS SOFTWARE.
</p>
<p>
Except as contained in this notice, the name of a copyright holder shall not be
used in advertising or otherwise to promote the sale, use or other dealings in
this Software without prior written authorization of the copyright holder.
</p>
<hr style="color:gray;background-color:gray">
<p><small>
All trademarks and registered trademarks mentioned herein are the property of their respective owners.
</small></p>
<hr style="height:3px;color:black;background-color:black">
<h2>Third-Party Software Licenses</h2>
This section contains third-party software notices and/or additional terms for licensed
third-party software components included within ICU libraries.
<h3>1. Unicode Data Files and Software</h3>
<h3 align="center"><a name="Exhibit1">EXHIBIT 1</a><br>
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE</h3>
<blockquote>
<p>Unicode Data Files include all data files under the directories
<a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>,
<a href="http://www.unicode.org/reports/">http://www.unicode.org/reports/</a>,
and
<a title="http://www.unicode.org/cldr/data/" onClick="return top.js.OpenExtLink(window,event,this)" target="_blank" href="http://www.unicode.org/cldr/data/">
http://www.unicode.org/cldr/data/</a>. Unicode Data Files do not include PDF online code charts under the directory <a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>. Software includes any source code
published in the Unicode Standard or under the directories <a href="http://www.unicode.org/Public/">http://www.unicode.org/Public/</a>,
<a href="http://www.unicode.org/reports/">http://www.unicode.org/reports/</a>,
and
<a title="http://www.unicode.org/cldr/data/" onClick="return top.js.OpenExtLink(window,event,this)" target="_blank" href="http://www.unicode.org/cldr/data/">
http://www.unicode.org/cldr/data/</a>.</p>
<p>NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.</p>
<p>COPYRIGHT AND PERMISSION NOTICE</p>
<p>Copyright © 1991-2012 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in
<a href="http://www.unicode.org/copyright.html">http://www.unicode.org/copyright.html</a>.</p>
<p>Permission is hereby granted, free of charge, to any person obtaining a copy of the Unicode data files and
any associated documentation (the "Data Files") or Unicode software and any associated documentation (the "Software") to deal in the Data Files or Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Data Files or Software, and to permit persons to whom the Data Files or Software are furnished to do so, provided that (a) the above copyright notice(s) and this permission notice appear
with all copies of the Data Files or Software, (b) both the above copyright notice(s) and this permission notice appear in associated documentation, and (c) there is clear notice in each modified Data File or in the Software as well as in the documentation associated with the Data File(s) or Software that the data or software has been modified.</p>
<p>THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.</p>
<p>Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder.</p>
<hr width="80%">
<p>Unicode and the Unicode logo are trademarks of Unicode, Inc. in the United States and other countries. All third party trademarks referenced herein are the property of their respective owners.</p>
</blockquote>
<h3>2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)</h3>
<pre>
# The Google Chrome software developed by Google is licensed under the BSD license. Other software included in this distribution is provided under other licenses, as set forth below.
#
# The BSD License
# http://opensource.org/licenses/bsd-license.php
# Copyright (C) 2006-2008, Google Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
# Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# The word list in cjdict.txt are generated by combining three word lists listed
# below with further processing for compound word breaking. The frequency is generated
# with an iterative training against Google web corpora.
#
# * Libtabe (Chinese)
# - https://sourceforge.net/project/?group_id=1519
# - Its license terms and conditions are shown below.
#
# * IPADIC (Japanese)
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
# - Its license terms and conditions are shown below.
#
# ---------COPYING.libtabe ---- BEGIN--------------------
#
# /*
# * Copyrighy (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the TaBE Project nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# /*
# * Copyright (c) 1999 Computer Systems and Communication Lab,
# * Institute of Information Science, Academia Sinica.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the Computer Systems and Communication Lab
# * nor the names of its contributors may be used to endorse or
# * promote products derived from this software without specific
# * prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
#
# ---------------COPYING.libtabe-----END------------------------------------
#
#
# ---------------COPYING.ipadic-----BEGIN------------------------------------
#
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
# and Technology. All Rights Reserved.
#
# Use, reproduction, and distribution of this software is permitted.
# Any copy of this software, whether in its original form or modified,
# must include both the above copyright notice and the following
# paragraphs.
#
# Nara Institute of Science and Technology (NAIST),
# the copyright holders, disclaims all warranties with regard to this
# software, including all implied warranties of merchantability and
# fitness, in no event shall NAIST be liable for
# any special, indirect or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortuous action, arising out
# of or in connection with the use or performance of this software.
#
# A large portion of the dictionary entries
# originate from ICOT Free Software. The following conditions for ICOT
# Free Software applies to the current dictionary as well.
#
# Each User may also freely distribute the Program, whether in its
# original form or modified, to any third party or parties, PROVIDED
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
# on, or be attached to, the Program, which is distributed substantially
# in the same form as set out herein and that such intended
# distribution, if actually made, will neither violate or otherwise
# contravene any of the laws and regulations of the countries having
# jurisdiction over the User or the intended distribution itself.
#
# NO WARRANTY
#
# The program was produced on an experimental basis in the course of the
# research and development conducted during the project and is provided
# to users as so produced on an experimental basis. Accordingly, the
# program is provided without any warranty whatsoever, whether express,
# implied, statutory or otherwise. The term "warranty" used herein
# includes, but is not limited to, any warranty of the quality,
# performance, merchantability and fitness for a particular purpose of
# the program and the nonexistence of any infringement or violation of
# any right of any third party.
#
# Each user of the program will agree and understand, and be deemed to
# have agreed and understood, that there is no warranty whatsoever for
# the program and, accordingly, the entire risk arising from or
# otherwise connected with the program is assumed by the user.
#
# Therefore, neither ICOT, the copyright holder, or any other
# organization that participated in or was otherwise related to the
# development of the program and their respective officials, directors,
# officers and other employees shall be held liable for any and all
# damages, including, without limitation, general, special, incidental
# and consequential damages, arising out of or otherwise in connection
# with the use or inability to use the program or any product, material
# or result produced or otherwise obtained by using the program,
# regardless of whether they have been advised of, or otherwise had
# knowledge of, the possibility of such damages at any time during the
# project or thereafter. Each user will be deemed to have agreed to the
# foregoing by his or her commencement of use of the program. The term
# "use" as used herein includes, but is not limited to, the use,
# modification, copying and distribution of the program and the
# production of secondary products from the program.
#
# In the case where the program, whether in its original form or
# modified, was distributed or delivered to or received by a user from
# any person, organization or entity other than ICOT, unless it makes or
# grants independently of ICOT any specific warranty to the user in
# writing, such person, organization or entity, will also be exempted
# from and not be held liable to the user for any such damages as noted
# above as far as the program is concerned.
#
# ---------------COPYING.ipadic-----END------------------------------------
</pre>
<h3>3. Time Zone Database</h3>
<p>ICU uses the public domain data and code derived from <a href="http://www.iana.org/time-zones">
Time Zone Database</a> for its time zone support. The ownership of the TZ database is explained
in <a href="http://tools.ietf.org/html/rfc6557">BCP 175: Procedure for Maintaining the Time Zone
Database</a> section 7.<p>
<pre>
7. Database Ownership
The TZ database itself is not an IETF Contribution or an IETF
document. Rather it is a pre-existing and regularly updated work
that is in the public domain, and is intended to remain in the public
domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do not apply
to the TZ Database or contributions that individuals make to it.
Should any claims be made and substantiated against the TZ Database,
the organization that is providing the IANA Considerations defined in
this RFC, under the memorandum of understanding with the IETF,
currently ICANN, may act in accordance with all competent court
orders. No ownership claims will be made by ICANN or the IETF Trust
on the database or the code. Any person making a contribution to the
database or code waives all rights to future claims in that
contribution or in the TZ Database.
</pre>
</body>
</html>

159
intl/icu/packaging/PACKAGES Normal file
Просмотреть файл

@ -0,0 +1,159 @@
Copyright (C) 2000-2003, International Business Machines
Corporation and others. All Rights Reserved.
ICU is packaged into a number of small, interdependent packages. This
file describes what these packages are, what their name should be
like, and what their contents are. It is useful as a reference and a
guide when packaging ICU on a new system.
+ List of ICU packages.
ICU is distributed as the following packages:
- ICU libraries. This package contains the runtime libraries needed by
applications that use ICU. All the other packages require this package
to be installed.
- ICU. This package contains the converters data, the timezones data,
and all the ICU tools.
- ICU locales. This package adds locales and break data.
- ICU development. This package contains the files necessary to build
applications that use ICU, i.e. header files, links to shared
libraries used by the linker, static libraries, etc... It also
contains sample applications and documentation.
- ICU docs. This package contains further documentation for ICU,
including a complete API reference.
- ICU data. This package contains the source for the compiled data
contained by the ICU package.
- ICU international data. This package contains the source for the
compiled data contained by the ICU locales package.
In this file, we will refer to Autoconf variables as in $(bindir). In
addition to these, we will use the following variables to denote
ICU-specific directories or information:
VERSION ICU's dotted version number, e.g. 1.6.0.1 as of this
writing.
ICUDATADIR The directory where portable ICU data are. This is
defined as $(datadir)/icu/$(VERSION).
ICULIBDIR The directory where platform-specific ICU data
are. This is defined as $(libdir)/icu/$(VERSION).
ICUSYSCONFDIR The directory where ICU configuration files are. This
is defined as $(sysconfdir)/icu.
When referring to libraries, .so will be used to denote the extension
of a shared library, and .a to denote the extension of a static
library. These extensions will actually be different on some platforms.
+ Configuration and compilation of ICU
ICU should be configured with the following options:
--with-data-packaging=files
--disable-rpath
--enable-shared
--enable-static
--without-samples
in addition to platform-specific settings (like a specific mandir or
sysconfdir). Note that the use of --disable-rpath assumes that the
packaging is made for a standard location, or that the package
installation/deinstallation will correctly manage the configuration
of the system's dyanmic loader. This is the right way of doing things.
The configure script invokation should also be done with
CFLAGS="-O2"
set, as in:
$ CFLAGS="-O2" ./configure ...
The files packaging mode is chosen because it offers the maximum
flexibility. Packages can be split easily, and system administrators
can add converters, aliases, and other resources with little
effort. Ideally, the ICU build will be modified to allow for distributing a
libicudata.so with all the converters and locales, but indexes and aliases
as separate files. But for now, this is the easiest way to get started.
+ The ICU libraries package
The ICU libraries package is typically named `libicuXX' where XX is
the major number of ICU's libraries. This number is ICU's version
number multiplied by 10 and rounded down to the nearest integer (it is
also the value of the LIB_VERSION_MAJOR configure substitution
variable). For example, for ICU 1.6.0.1, it is 16, so the package name
is `libicu16'. The major version is part of the package name to allow
for the simultaneous installation of different ICU releases.
This package contains:
- All the shared libraries, and their major number symbolic link, but
not the .so symbolic link that is only used at link time (this one is
part of the development package). These are $(libdir)/libicu*.so.* and
$(libdir)/libustdio.so.* at the time of this writing.
+ The ICU package
The ICU package is simply named `icu'. It provides data used by the ICU
libraries package and commands to create and manipulate that data.
This package contains:
- The Unicode data files (uprops.dat and unames.dat as of this writing).
- The time zones data files (tz.dat).
- All the binary data files for converters (.cnv files).
- All the ICU commands.
- The manual pages for ICU commands and file formats.
+ The ICU locales package
The ICU locales package is named `icu-locales'. It provides data used by
internationalization support in ICU.
This package contains:
- All the data for locales in ICU (.dat files).
- All the break data for specific locales (.brk files).
+ The ICU development package
The ICU developpment package is named `libicu-dev'. It provides all
the files necessary to write applications that use ICU, along with
examples and some documentation.
This package contains:
- The /usr/include/unicode directory which contains all the ICU
headers.
- The .so symbolic links used by the linker to link against the
latest version of the libraries.
- A sample Makefile fragment that can be included by applications
using ICU, to faciliate their building, along with a platform-specific
configuration file included by this fragment.
- The sample applications from the ICU source tree, in an appropriate
location for the system that the package is installed on (for example,
on Debian, in /usr/share/doc/libicu-dev/examples).
This package depends on the ICU libraries package with the exact same
version, since it provides .so symbolic links to the latest libraries.
+ The ICU docs package
The ICU docs package is named `libicu-doc'. It contains the files
generated by doxygen when the `make doc' command is executed, in a
location appropriate for the system that the package is installed on.
+ The ICU data package
The ICU data package is named `icu-data'. It contains source files for
the data found in the ICU package. These files are installed in
$(ICUDATADIR).
+ The ICU international data package
The ICU data package is named `icu-i18ndata'. It contains source files for
the dat founf in the ICU locales package. These files are installed in
$(ICUDATADIR).
----
Yves Arrouye <yves@realnames.com>

13
intl/icu/packaging/README Normal file
Просмотреть файл

@ -0,0 +1,13 @@
Copyright (C) 2000-2003, International Business Machines
Corporation and others. All Rights Reserved.
This directory contains information, input files and scripts for
packaging ICU using specific packaging tools. We assume that the
packager is familiar with the tools and procedures needed to build a
package for a given packaging method (for example, how to use
dpkg-buildpackage(1) on Debian GNU/Linux, or rpm(8) on distributions that
use RPM packages).
Please read the file PACKAGES if you are interested in packaging ICU
yourself. It describes what the different packages should be, and what
their contents are.

Просмотреть файл

@ -0,0 +1,228 @@
# Copyright (C) 2000-2005, International Business Machines
# Corporation and others. All Rights Reserved.
#
# RPM specification file for ICU.
#
# Neal Probert <nprobert@walid.com> is the current maintainer.
# Yves Arrouye <yves@realnames.com> is the original author.
# This file can be freely redistributed under the same license as ICU.
Name: icu
Version: 3.4
Release: 1
Requires: libicu34 >= %{version}
Summary: International Components for Unicode
Packager: Ian Holsman (CNET Networks) <ianh@cnet.com>
Copyright: X License
Group: System Environment/Libraries
Source: icu-%{version}.tgz
BuildRoot: /var/tmp/%{name}-%{version}
%description
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode and locale support. The library provides calendar support, conversions
for many character sets, language sensitive collation, date
and time formatting, support for many locales, message catalogs
and resources, message formatting, normalization, number and currency
formatting, time zones support, transliteration, word, line and
sentence breaking, etc.
This package contains the Unicode character database and derived
properties, along with converters and time zones data.
This package contains the runtime libraries for ICU. It does
not contain any of the data files needed at runtime and present in the
`icu' and `icu-locales` packages.
%package -n libicu34
Summary: International Components for Unicode (libraries)
Group: Development/Libraries
%description -n libicu34
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode support. This package contains the runtime libraries for ICU. It does
not contain any of the data files needed at runtime and present in the
`icu' and `icu-locales` packages.
%package -n libicu-devel
Summary: International Components for Unicode (development files)
Group: Development/Libraries
Requires: libicu34 = %{version}
%description -n libicu-devel
ICU is a set of C and C++ libraries that provides robust and full-featured
Unicode support. This package contains the development files for ICU.
%package locales
Summary: Locale data for ICU
Group: System Environment/Libraries
Requires: libicu34 >= %{version}
%description locales
The locale data are used by ICU to provide localization (l10n),
internationalization (i18n) and timezone support to ICU applications.
This package also contains break data for various languages,
and transliteration data.
%post
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
#ICU_DATA=/usr/share/icu/%{version}
#export ICU_DATA
%preun
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%post -n libicu34
ldconfig
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%preun -n libicu34
# Adjust the current ICU link in /usr/lib/icu
icucurrent=`2>/dev/null ls -dp /usr/lib/icu/* | sed -n -e '/\/%{version}\//d' -e 's,.*/\([^/]*\)/$,\1,p'| sort -rn | head -1`
cd /usr/lib/icu
rm -f /usr/lib/icu/current
if test x"$icucurrent" != x
then
ln -s "$icucurrent" current
fi
%prep
%setup -q -n icu
%build
cd source
chmod a+x ./configure
CFLAGS="-O3" CXXFLAGS="-O" ./configure --prefix=/usr --sysconfdir=/etc --with-data-packaging=files --enable-shared --enable-static --disable-samples
echo 'CPPFLAGS += -DICU_DATA_DIR=\"/usr/share/icu/%{version}\"' >> icudefs.mk
make RPM_OPT_FLAGS="$RPM_OPT_FLAGS"
%install
rm -rf $RPM_BUILD_ROOT
cd source
make install DESTDIR=$RPM_BUILD_ROOT
%files
%defattr(-,root,root)
%doc readme.html
%doc license.html
/usr/share/icu/%{version}/license.html
/usr/share/icu/%{version}/icudt34l/*.cnv
/usr/share/icu/%{version}/icudt34l/*.icu
/usr/share/icu/%{version}/icudt34l/*.spp
/usr/bin/derb
/usr/bin/genbrk
/usr/bin/gencnval
/usr/bin/genrb
/usr/bin/icu-config
/usr/bin/makeconv
/usr/bin/pkgdata
/usr/bin/uconv
/usr/sbin/decmn
/usr/sbin/genccode
/usr/sbin/gencmn
/usr/sbin/gensprep
/usr/sbin/genuca
/usr/sbin/icuswap
/usr/share/icu/%{version}/mkinstalldirs
/usr/man/man1/derb.1.*
/usr/man/man1/gencnval.1.*
/usr/man/man1/genrb.1.*
/usr/man/man1/icu-config.1.*
/usr/man/man1/makeconv.1.*
/usr/man/man1/pkgdata.1.*
/usr/man/man1/uconv.1.*
/usr/man/man8/decmn.8.*
/usr/man/man8/genccode.8.*
/usr/man/man8/gencmn.8.*
/usr/man/man8/gensprep.8.*
/usr/man/man8/genuca.8.*
%files -n icu-locales
/usr/share/icu/%{version}/icudt34l/*.brk
/usr/share/icu/%{version}/icudt34l/*.res
/usr/share/icu/%{version}/icudt34l/coll/*.res
/usr/share/icu/%{version}/icudt34l/rbnf/*.res
/usr/share/icu/%{version}/icudt34l/translit/*.res
%files -n libicu34
%doc license.html
/usr/lib/libicui18n.so.34
/usr/lib/libicui18n.so.34.0
/usr/lib/libicutu.so.34
/usr/lib/libicutu.so.34.0
/usr/lib/libicuuc.so.34
/usr/lib/libicuuc.so.34.0
/usr/lib/libicudata.so.34
/usr/lib/libicudata.so.34.0
/usr/lib/libicuio.so.34
/usr/lib/libicuio.so.34.0
/usr/lib/libiculx.so.34
/usr/lib/libiculx.so.34.0
/usr/lib/libicule.so.34
/usr/lib/libicule.so.34.0
%files -n libicu-devel
%doc readme.html
%doc license.html
/usr/lib/libicui18n.so
/usr/lib/libsicui18n.a
/usr/lib/libicuuc.so
/usr/lib/libsicuuc.a
/usr/lib/libicutu.so
/usr/lib/libsicutu.a
/usr/lib/libicuio.so
/usr/lib/libsicuio.a
/usr/lib/libicudata.so
/usr/lib/libsicudata.a
/usr/lib/libicule.so
/usr/lib/libsicule.a
/usr/lib/libiculx.so
/usr/lib/libsiculx.a
/usr/include/unicode/*.h
/usr/include/layout/*.h
/usr/lib/icu/%{version}/Makefile.inc
/usr/lib/icu/Makefile.inc
/usr/share/icu/%{version}/config
/usr/share/doc/icu-%{version}/*
%changelog
* Mon Jun 07 2004 Alexei Dets <adets@idsk.com>
- update to 3.0
* Tue Aug 16 2003 Steven Loomis <srl@jtcsv.com>
- update to 2.6.1 - include license
* Thu Jun 05 2003 Steven Loomis <srl@jtcsv.com>
- Update to 2.6
* Fri Dec 27 2002 Steven Loomis <srl@jtcsv.com>
- Update to 2.4 spec
* Fri Sep 27 2002 Steven Loomis <srl@jtcsv.com>
- minor updates to 2.2 spec. Rpath is off by default, don't pass it as an option.
* Mon Sep 16 2002 Ian Holsman <ian@holsman.net>
- update to icu 2.2

1773
intl/icu/readme.html Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

233
intl/icu/source/Doxyfile.in Normal file
Просмотреть файл

@ -0,0 +1,233 @@
# Doxyfile 1.3.7
# ********************************************************************
# * COPYRIGHT:
# * Copyright (c) 2004-2012, International Business Machines Corporation
# * and others. All Rights Reserved.
# ********************************************************************
#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = "ICU @VERSION@"
PROJECT_NUMBER = @VERSION@
OUTPUT_DIRECTORY = doc
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
#USE_WINDOWS_ENCODING = YES
DOXYFILE_ENCODING = UTF-8
BRIEF_MEMBER_DESC = YES
REPEAT_BRIEF = YES
ABBREVIATE_BRIEF =
ALWAYS_DETAILED_SEC = NO
INLINE_INHERITED_MEMB = NO
FULL_PATH_NAMES = NO
STRIP_FROM_PATH =
STRIP_FROM_INC_PATH =
SHORT_NAMES = NO
JAVADOC_AUTOBRIEF = YES
MULTILINE_CPP_IS_BRIEF = NO
#DETAILS_AT_TOP = NO
INHERIT_DOCS = YES
DISTRIBUTE_GROUP_DOC = YES
TAB_SIZE = 8
ALIASES = "memo=\par Note:\n" \
"draft=\xrefitem draft \"Draft\" \"Draft List\" This API may be changed in the future versions and was introduced in" \
"stable=\xrefitem stable \"Stable\" \"Stable List\"" \
"deprecated=\xrefitem deprecated \"Deprecated\" \"Deprecated List\"" \
"obsolete=\xrefitem obsolete \"Obsolete\" \"Obsolete List\"" \
"system=\xrefitem system \"System\" \"System List\" \n Do not use unless you know what you are doing." \
"internal=\xrefitem internal \"Internal\" \"Internal List\" Do not use. This API is for internal use only."
OPTIMIZE_OUTPUT_FOR_C = YES
OPTIMIZE_OUTPUT_JAVA = NO
SUBGROUPING = YES
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
EXTRACT_ALL = NO
EXTRACT_PRIVATE = NO
EXTRACT_STATIC = NO
EXTRACT_LOCAL_CLASSES = YES
EXTRACT_LOCAL_METHODS = NO
HIDE_UNDOC_MEMBERS = NO
HIDE_UNDOC_CLASSES = NO
HIDE_FRIEND_COMPOUNDS = NO
HIDE_IN_BODY_DOCS = NO
INTERNAL_DOCS = YES
CASE_SENSE_NAMES = YES
HIDE_SCOPE_NAMES = NO
SHOW_INCLUDE_FILES = YES
INLINE_INFO = YES
SORT_MEMBER_DOCS = YES
SORT_BRIEF_DOCS = NO
SORT_BY_SCOPE_NAME = NO
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
GENERATE_DEPRECATEDLIST= YES
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
# docset
GENERATE_DOCSET = NO
DOCSET_FEEDNAME = "ICU @VERSION@"
DOCSET_BUNDLE_ID = org.icu-project.icu4c
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
WARNINGS = YES
WARN_IF_UNDOCUMENTED = YES
WARN_IF_DOC_ERROR = YES
WARN_FORMAT = "$file:$line: $text"
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = @srcdir@/common/unicode @srcdir@/i18n/unicode @srcdir@/io/unicode @srcdir@/layout/LEFontInstance.h @srcdir@/layout/LEGlyphStorage.h @srcdir@/layout/LELanguages.h @srcdir@/layout/LEScripts.h @srcdir@/layout/LESwaps.h @srcdir@/layout/LETypes.h @srcdir@/layout/LayoutEngine.h @srcdir@/layoutex/layout
FILE_PATTERNS = *.h
RECURSIVE = NO
EXCLUDE = @srcdir@/common/unicode/urename.h @srcdir@/common/unicode/udraft.h @srcdir@/common/unicode/udeprctd.h @srcdir@/common/unicode/uobslete.h @srcdir@/common/unicode/ppalmos.h
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = config*.h
EXAMPLE_PATH = @srcdir@/
EXAMPLE_PATTERNS =
EXAMPLE_RECURSIVE = NO
IMAGE_PATH =
INPUT_FILTER =
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------
SOURCE_BROWSER = YES
INLINE_SOURCES = NO
STRIP_CODE_COMMENTS = YES
REFERENCED_BY_RELATION = YES
REFERENCES_RELATION = YES
VERBATIM_HEADERS = YES
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
ALPHABETICAL_INDEX = YES
COLS_IN_ALPHA_INDEX = 5
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
GENERATE_HTML = YES
HTML_OUTPUT = html
HTML_FILE_EXTENSION = .html
HTML_HEADER =
HTML_FOOTER =
HTML_STYLESHEET =
HTML_ALIGN_MEMBERS = YES
GENERATE_HTMLHELP = NO
CHM_FILE =
HHC_LOCATION =
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
DISABLE_INDEX = NO
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
GENERATE_LATEX = NO
LATEX_OUTPUT = latex
LATEX_CMD_NAME = latex
MAKEINDEX_CMD_NAME = makeindex
COMPACT_LATEX = NO
PAPER_TYPE = a4wide
EXTRA_PACKAGES =
LATEX_HEADER =
PDF_HYPERLINKS = NO
USE_PDFLATEX = NO
LATEX_BATCHMODE = NO
LATEX_HIDE_INDICES = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
GENERATE_RTF = NO
RTF_OUTPUT = rtf
COMPACT_RTF = NO
RTF_HYPERLINKS = NO
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
GENERATE_MAN = NO
MAN_OUTPUT = man
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
GENERATE_XML = NO
XML_OUTPUT = xml
XML_SCHEMA =
XML_DTD =
XML_PROGRAMLISTING = YES
#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------
GENERATE_PERLMOD = NO
PERLMOD_LATEX = YES
PERLMOD_PRETTY = YES
PERLMOD_MAKEVAR_PREFIX =
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_HAVE_STD_STRING=1 U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::additions related to external references
#---------------------------------------------------------------------------
TAGFILES =
GENERATE_TAGFILE = "@builddir@/doc/html/icudocs.tag"
ALLEXTERNALS = NO
EXTERNAL_GROUPS = YES
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
CLASS_DIAGRAMS = YES
HIDE_UNDOC_RELATIONS = YES
HAVE_DOT = NO
CLASS_GRAPH = YES
COLLABORATION_GRAPH = YES
UML_LOOK = NO
TEMPLATE_RELATIONS = NO
INCLUDE_GRAPH = YES
INCLUDED_BY_GRAPH = YES
CALL_GRAPH = NO
CALLER_GRAPH = NO
GRAPHICAL_HIERARCHY = YES
DOT_IMAGE_FORMAT = png
DOT_PATH =
#DOT_FONTNAME = FreeSans
DOTFILE_DIRS =
MAX_DOT_GRAPH_WIDTH = 1024
MAX_DOT_GRAPH_HEIGHT = 1024
MAX_DOT_GRAPH_DEPTH = 0
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::additions related to the search engine
#---------------------------------------------------------------------------
SEARCHENGINE = NO

382
intl/icu/source/Makefile.in Normal file
Просмотреть файл

@ -0,0 +1,382 @@
#******************************************************************************
#
# Copyright (C) 1998-2012, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
## Top-level Makefile.in for ICU
## Stephen F. Booth
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = .
include $(top_builddir)/icudefs.mk
docdir = $(datadir)/doc
docsubdir = $(PACKAGE)$(ICULIBDASHSUFFIX)/html
docfilesdir = doc/html
docfiles = $(docfilesdir)/*.png $(docfilesdir)/*.html $(docfilesdir)/*.css $(docfilesdir)/*.tag
docsrchdir = $(docfilesdir)/search
docsrchfiles = $(docsrchdir)/*
##
## Build directory information
subdir = .
#AUTOCONF = @AUTOCONF@
## Optional directory setup
@LAYOUT_TRUE@LAYOUT = layout layoutex
@ICUIO_TRUE@ICUIO = io
@EXTRAS_TRUE@EXTRA = extra
@TESTS_TRUE@TEST = test
@SAMPLES_TRUE@SAMPLE = samples
## pkgconfig setup. Always have uc and i18n. Others are optional.
ALL_PKGCONFIG_SUFFIX=uc i18n
@LAYOUT_TRUE@ALL_PKGCONFIG_SUFFIX+= le lx
@ICUIO_TRUE@ALL_PKGCONFIG_SUFFIX+= io
DOXYGEN = @DOXYGEN@
DOCZIP = icu-docs.zip
## Files to remove for 'make clean'
CLEANFILES = *~
ALL_PKGCONFIG_FILES=$(ALL_PKGCONFIG_SUFFIX:%=$(top_builddir)/config/icu-%.pc)
## Files built (autoconfed) and installed
INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config @platform_make_fragment@ $(EXTRA_DATA:%=$(DESTDIR)$(pkglibdir)/%) $(ALL_PKGCONFIG_FILES)
## Files built (autoconfed) but not installed
LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk
DOCDIRS = common i18n
SUBDIRS = stubdata common i18n $(LAYOUT) tools data $(ICUIO) $(EXTRA) $(SAMPLE) $(TEST)
SECTION = 1
MANX_FILES = config/icu-config.$(SECTION)
ALL_MAN_FILES = $(MANX_FILES)
## Extra files to install [nothing at present]
EXTRA_DATA =
## List of phony targets
.PHONY : all all-local all-recursive install install-local install-udata install-udata-files install-udata-dlls \
install-recursive clean clean-local clean-recursive distclean \
distclean-local distclean-recursive doc dist dist-local dist-recursive \
check check-local check-recursive clean-recursive-with-twist install-icu \
doc install-doc tests icu4j-data icu4j-data-install update-windows-makefiles xcheck-local xcheck-recursive xperf xcheck xperf-recursive \
check-exhaustive check-exhaustive-local check-exhaustive-recursive releaseDist
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local all-recursive
install: install-recursive install-local
clean: clean-recursive-with-twist clean-local
distclean : distclean-recursive distclean-local
dist: dist-recursive dist-local
check: all check-recursive
check-recursive: all
xcheck: all xcheck-recursive
xperf: all xperf-recursive
check-exhaustive: all check-exhaustive-recursive
check-exhaustive-local: check-local
xcheck-recursive: all xcheck-local
@$(MAKE) -C test xcheck
xperf-recursive: all tests
@$(MAKE) -C test/perf xperf
$(top_builddir)/config/icuinfo.xml: all
@$(MAKE) -C tools/icuinfo check
ifeq ($(DOXYGEN),)
doc doc-searchengine:
@echo you need Doxygen to generate documentation. Doxygen can be found on the Web
@echo at http://www.doxygen.org/
else
doc: doc/html/index.html
doc-searchengine: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/common/unicode/*.h $(srcdir)/i18n/unicode/*.h $(srcdir)/layout/unicode/*.h $(srcdir)/io/unicode/*.h)
sed < Doxyfile -e 's%[^#]*SEARCHENGINE.*%SEARCHENGINE=YES%' | $(DOXYGEN) -
@echo adding links from non-namespaced class files
find doc/html -name 'classicu_1_1*' -print | sed -e 's%^\(.*class\)icu_1_1\(.*\)$$%ln & \1\2%' | sh
@echo Docs created - WARNING, probably contains non-GPL .js files
doc/html/index.html: Doxyfile $(wildcard ./common/unicode/platform.h $(srcdir)/common/unicode/*.h $(srcdir)/i18n/unicode/*.h $(srcdir)/layout/unicode/*.h $(srcdir)/io/unicode/*.h)
$(DOXYGEN)
@echo adding links from non-namespaced class files
find doc/html -name 'classicu_1_1*' -print | sed -e 's%^\(.*class\)icu_1_1\(.*\)$$%ln & \1\2%' | sh
Doxyfile: $(srcdir)/Doxyfile.in
CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(DOCZIP): doc
-$(RMV) $(DOCZIP)
( cd doc/html ; zip -r ../../$(DOCZIP) * )
endif
LOCAL_SUBDIRS = $(SUBDIRS)
CLEAN_FIRST_SUBDIRS = tools
$(LIBDIR) $(BINDIR):
-$(MKINSTALLDIRS) $@
## Recursive targets
all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive check-exhaustive-recursive: $(LIBDIR) $(BINDIR)
@dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
list='$(LOCAL_SUBDIRS)'; for subdir in $$list; do \
echo "$(MAKE)[$(MAKELEVEL)]: Making \`$$target' in \`$$subdir'"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-local"; \
else \
local_target="$$target"; \
fi; \
(cd $$subdir && $(MAKE) RECURSIVE=YES $$local_target) || exit; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) "$$target-local" || exit; \
fi
clean-recursive-with-twist:
$(MAKE) clean-recursive LOCAL_SUBDIRS='$(CLEAN_FIRST_SUBDIRS) $(filter-out $(CLEAN_FIRST_SUBDIRS),$(LOCAL_SUBDIRS))'
all-local: $(srcdir)/configure $(LOCAL_BUILT_FILES) $(INSTALLED_BUILT_FILES)
ifndef VERBOSE
@echo "Note: rebuild with \"$(MAKE) VERBOSE=1 $(MAKECMDGOALS)\" to show all compiler parameters."
endif
install-local: install-icu install-manx
install-icu: $(INSTALLED_BUILT_FILES)
@$(MKINSTALLDIRS) $(DESTDIR)$(pkgdatadir)/config
@$(MKINSTALLDIRS) $(DESTDIR)$(pkglibdir)
@$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
@$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
$(INSTALL_DATA) @platform_make_fragment@ $(DESTDIR)$(pkgdatadir)/config/@platform_make_fragment_name@
$(INSTALL_SCRIPT) $(top_srcdir)/mkinstalldirs $(DESTDIR)$(pkgdatadir)/mkinstalldirs
$(INSTALL_SCRIPT) $(top_srcdir)/install-sh $(DESTDIR)$(pkgdatadir)/install-sh
@$(MKINSTALLDIRS) $(DESTDIR)$(libdir)/pkgconfig
$(INSTALL_DATA) $(ALL_PKGCONFIG_FILES) $(DESTDIR)$(libdir)/pkgconfig/
$(INSTALL_DATA) $(top_srcdir)/../license.html $(DESTDIR)$(pkgdatadir)/license.html
$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
$(INSTALL_DATA) $(top_builddir)/config/pkgdata.inc $(DESTDIR)$(pkglibdir)/pkgdata.inc
# @echo icuinfo.xml is built after make check.
# -$(INSTALL_DATA) $(top_builddir)/config/icuinfo.xml $(DESTDIR)$(pkglibdir)/icuinfo.xml
cd $(DESTDIR)$(pkglibdir)/..; \
$(RM) current && ln -s $(VERSION) current; \
$(RM) Makefile.inc && ln -s current/Makefile.inc Makefile.inc; \
$(RM) pkgdata.inc && ln -s current/pkgdata.inc pkgdata.inc
ifeq ($(DOXYGEN),)
install-doc:
else
install-doc: doc
$(RM) -r $(DESTDIR)$(docdir)/$(docsubdir)
$(MKINSTALLDIRS) $(DESTDIR)$(docdir)/$(docsubdir)
$(INSTALL_DATA) $(docfiles) $(DESTDIR)$(docdir)/$(docsubdir)
endif
$(DESTDIR)$(pkglibdir)/%: $(top_srcdir)/../data/%
$(INSTALL_DATA) $< $@
# Build the tests, but don't run them.
tests: all
$(MAKE) -C $(top_builddir)/test
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
-$(RMV) "test-*.xml"
-$(RMV) "perf-*.xml"
-$(RMV) $(ALL_PKGCONFIG_FILES) $(top_builddir)/config/icuinfo.xml
$(RMV) Doxyfile doc $(DOCZIP)
distclean-local: clean-local
$(RMV) $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc $(top_builddir)/config/icu-config $(top_builddir)/config/icu.pc $(ALL_PKGCONFIG_FILES)
$(RMV) config.cache config.log config.status $(top_builddir)/config/icucross.mk autom4te.cache uconfig.h.prepend
$(RMV) Makefile config/Makefile icudefs.mk $(LIBDIR) $(BINDIR)
-$(RMV) dist
check-local: xcheck-local
-$(RMV) test-local.xml
xcheck-local: $(top_builddir)/config/icu-config $(top_builddir)/config/Makefile.inc $(top_builddir)/config/pkgdata.inc
@echo verifying that icu-config --selfcheck can operate
@test "passed" = "$(shell $(top_builddir)/config/icu-config --selfcheck 2>&1)" || (echo "FAIL: icu-config could not run properly." ; exit 1)
@echo verifying that $(MAKE) -f Makefile.inc selfcheck can operate
@test "passed" = "$(shell $(MAKE) --no-print-directory -f $(top_builddir)/config/Makefile.inc SELFCHECK=1 selfcheck)" || (echo "FAIL: Makefile.inc could not run properly." ; exit 1 )
@echo "PASS: config selfcheck OK"
#$(srcdir)/configure : $(srcdir)/configure.in $(top_srcdir)/aclocal.m4
# cd $(srcdir) && $(AUTOCONF)
icudefs.mk: $(srcdir)/icudefs.mk.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
config/icucross.mk: $(top_builddir)/icudefs.mk $(top_builddir)/Makefile
@echo rebuilding $@
@(echo "CROSS_ICU_VERSION=$(VERSION)" ;\
echo "TOOLEXEEXT=$(EXEEXT)" \
) > $@
@(echo 'TOOLBINDIR=$$(cross_buildroot)/bin' ;\
echo 'TOOLLIBDIR=$$(cross_buildroot)/lib' ;\
echo "INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(TOOLLIBDIR):$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$$$'"$(LDLIBRARYPATH_ENVVAR)" ;\
echo "PKGDATA_INVOKE=$(LDLIBRARYPATH_ENVVAR)=$(LIBRARY_PATH_PREFIX)"'$$(cross_buildroot)/stubdata:$$(cross_buildroot)/tools/ctestfw:$$(TOOLLIBDIR):$$$$'"$(LDLIBRARYPATH_ENVVAR) " ;\
echo ) >> $@
config/icu.pc: $(srcdir)/config/icu.pc.in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
config/icu-uc.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Common and Data libraries" >> $@
@echo "Name: $(PACKAGE)-uc" >> $@
@echo "Libs:" '-L$${libdir}' "${ICULIBS_UC}" "${ICULIBS_DT}" >> $@
@echo "Libs.private:" '$${baselibs}' >> $@
@echo $@ updated.
config/icu-i18n.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Internationalization library" >> $@
@echo "Name: $(PACKAGE)-i18n" >> $@
@echo "Requires: icu-uc" >> $@
@echo "Libs:" "${ICULIBS_I18N}" >> $@
@echo $@ updated.
config/icu-io.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Stream and I/O Library" >> $@
@echo "Name: $(PACKAGE)-io" >> $@
@echo "Requires: icu-i18n" >> $@
@echo "Libs:" "${ICULIBS_IO}" >> $@
@echo $@ updated.
config/icu-le.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Layout library" >> $@
@echo "Name: $(PACKAGE)-le" >> $@
@echo "Requires: icu-uc" >> $@
@echo "Libs:" "${ICULIBS_LE}" >> $@
@echo $@ updated.
config/icu-lx.pc: config/icu.pc Makefile icudefs.mk
@cat config/icu.pc > $@
@echo "Description: $(PACKAGE_ICU_DESCRIPTION): Paragraph Layout library" >> $@
@echo "Name: $(PACKAGE)-lx" >> $@
@echo "Requires: icu-le" >> $@
@echo "Libs:" "${ICULIBS_LX}" >> $@
@echo $@ updated.
Makefile: $(srcdir)/Makefile.in icudefs.mk $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/Makefile.inc: $(srcdir)/config/Makefile.inc.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/pkgdata.inc: icudefs.mk $(top_builddir)/config/pkgdataMakefile
cd $(top_builddir)/config; \
$(MAKE) -f pkgdataMakefile
$(top_builddir)/config/pkgdataMakefile:
cd $(top_builddir) \
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(top_builddir)/config/icu-config: $(top_builddir)/Makefile $(top_srcdir)/config/icu-config-top $(top_srcdir)/config/icu-config-bottom $(top_builddir)/config/Makefile.inc @platform_make_fragment@ $(top_srcdir)/config/make2sh.sed
-$(RMV) $@
$(INSTALL_SCRIPT) $(top_srcdir)/config/icu-config-top $@
chmod u+w $@
@echo "# Following from @platform_make_fragment@" >> $@
LC_ALL=C sed -f $(top_srcdir)/config/make2sh.sed < $(top_builddir)/config/Makefile.inc | grep -v '#M#' | uniq >> $@
LC_ALL=C sed -f $(top_srcdir)/config/make2sh.sed < @platform_make_fragment@ | grep -v '#M#' | uniq >> $@
cat $(top_srcdir)/config/icu-config-bottom >> $@
echo "# Rebuilt on "`date` >> $@
chmod u-w $@
config.status: $(srcdir)/configure $(srcdir)/common/unicode/uvernum.h
@echo
@echo
@echo "*** config.status has become stale ***"
@echo " 'configure' and/or 'uvernum.h' have changed, please"
@echo " do 'runConfigureICU' (or 'configure') again, as per"
@echo " the readme.html."
@echo
@echo
exit 1
install-manx: $(MANX_FILES)
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
icu4j-data-install icu4j-data: all tests
@echo ICU4J_ROOT=$(ICU4J_ROOT)
@$(MAKE) -C test/testdata $@
@$(MAKE) -C data $@
# For updating Windows makefiles
WINDOWS_UPDATEFILES=$(srcdir)/data/makedata.mak $(shell find $(srcdir) -name '*.vcproj' -o -name '*.vcxproj')
WINDOWS_UPDATEFILES_SED=config/windows-update.sed
update-windows-makefiles: config.status
@echo Updating Windows Makefiles for ICU $(VERSION)
CONFIG_FILES=$(WINDOWS_UPDATEFILES_SED) CONFIG_HEADERS= $(SHELL) ./config.status
@for file in $(WINDOWS_UPDATEFILES); do \
echo "Updating $$file"; \
mv "$${file}" "$${file}.bak" && \
sed -f $(WINDOWS_UPDATEFILES_SED) < "$${file}.bak" > "$${file}" && \
rm "$${file}.bak"; \
done;
$(RMV) $(WINDOWS_UPDATEFILES_SED)
@echo Please check over the changes carefully before checking them in.
# For building a source distribution.
distcheck dist-local:
$(MAKE) -C . -f $(top_srcdir)/config/dist.mk srcdir="$(srcdir)" top_srcdir="$(top_srcdir)" $@
ifeq ($(DESTDIR),)
releaseDist:
@echo "Please provide DESTDIR when calling the target releaseDist."
else
releaseDist: install
@echo -n "ICU Version: " > $(DESTDIR)/readme.txt
@echo `./config/icu-config --noverify --version` >> $(DESTDIR)/readme.txt
@echo -n "HOST: " >> $(DESTDIR)/readme.txt
@echo `./config/icu-config --noverify --host` >> $(DESTDIR)/readme.txt
@echo -n "CC Compiler: " >> $(DESTDIR)/readme.txt
@echo `./config/icu-config --noverify --cc` >> $(DESTDIR)/readme.txt
@echo -n "CXX Compiler: " >> $(DESTDIR)/readme.txt
@echo `./config/icu-config --noverify --cxx` >> $(DESTDIR)/readme.txt
endif
check-installed-icu: install
@echo "Testing ICU installed in $(prefix)"
$(INSTALLED_INVOKE) $(bindir)/icuinfo$(EXEEXT)
$(INSTALLED_INVOKE) $(bindir)/uconv$(EXEEXT) -V
$(INSTALLED_INVOKE) $(bindir)/genrb$(EXEEXT) -V
$(INSTALLED_INVOKE) $(bindir)/gencnval$(EXEEXT) -h
@echo INSTALLED ICU IN "$(prefix)" OK!

482
intl/icu/source/aclocal.m4 поставляемый Normal file
Просмотреть файл

@ -0,0 +1,482 @@
# aclocal.m4 for ICU
# Copyright (c) 1999-2012, International Business Machines Corporation and
# others. All Rights Reserved.
# Stephen F. Booth
# @TOP@
# ICU_CHECK_MH_FRAG
AC_DEFUN(ICU_CHECK_MH_FRAG, [
AC_CACHE_CHECK(
[which Makefile fragment to use for ${host}],
[icu_cv_host_frag],
[
case "${host}" in
*-*-solaris*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-solaris-gcc
else
icu_cv_host_frag=mh-solaris
fi ;;
alpha*-*-linux-gnu)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-alpha-linux-gcc
else
icu_cv_host_frag=mh-alpha-linux-cc
fi ;;
powerpc*-*-linux*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-linux
else
icu_cv_host_frag=mh-linux-va
fi ;;
*-*-linux*|*-*-gnu|*-*-k*bsd*-gnu|*-*-kopensolaris*-gnu) icu_cv_host_frag=mh-linux ;;
*-*-cygwin|*-*-mingw32)
if test "$GCC" = yes; then
AC_TRY_COMPILE([
#ifndef __MINGW32__
#error This is not MinGW
#endif], [], icu_cv_host_frag=mh-mingw, icu_cv_host_frag=mh-cygwin)
else
icu_cv_host_frag=mh-cygwin-msvc
fi ;;
*-*-*bsd*|*-*-dragonfly*) icu_cv_host_frag=mh-bsd-gcc ;;
*-*-aix*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-aix-gcc
else
icu_cv_host_frag=mh-aix-va
fi ;;
*-*-hpux*)
if test "$GCC" = yes; then
icu_cv_host_frag=mh-hpux-gcc
else
case "$CXX" in
*aCC) icu_cv_host_frag=mh-hpux-acc ;;
esac
fi ;;
*-*ibm-openedition*|*-*-os390*) icu_cv_host_frag=mh-os390 ;;
*-*-os400*) icu_cv_host_frag=mh-os400 ;;
*-apple-rhapsody*) icu_cv_host_frag=mh-darwin ;;
*-apple-darwin*) icu_cv_host_frag=mh-darwin ;;
*-*-beos) icu_cv_host_frag=mh-beos ;;
*-*-haiku) icu_cv_host_frag=mh-haiku ;;
*-*-irix*) icu_cv_host_frag=mh-irix ;;
*-dec-osf*) icu_cv_host_frag=mh-alpha-osf ;;
*-*-nto*) icu_cv_host_frag=mh-qnx ;;
*-ncr-*) icu_cv_host_frag=mh-mpras ;;
*) icu_cv_host_frag=mh-unknown ;;
esac
]
)
])
# ICU_CONDITIONAL - similar example taken from Automake 1.4
AC_DEFUN(ICU_CONDITIONAL,
[AC_SUBST($1_TRUE)
if $2; then
$1_TRUE=
else
$1_TRUE='#'
fi])
# ICU_PROG_LINK - Make sure that the linker is usable
AC_DEFUN(ICU_PROG_LINK,
[
case "${host}" in
*-*-cygwin*|*-*-mingw*)
if test "$GCC" != yes && test -n "`link --version 2>&1 | grep 'GNU coreutils'`"; then
AC_MSG_ERROR([link.exe is not a valid linker. Your PATH is incorrect.
Please follow the directions in ICU's readme.])
fi;;
*);;
esac])
# AC_SEARCH_LIBS_FIRST(FUNCTION, SEARCH-LIBS [, ACTION-IF-FOUND
# [, ACTION-IF-NOT-FOUND [, OTHER-LIBRARIES]]])
# Search for a library defining FUNC, then see if it's not already available.
AC_DEFUN(AC_SEARCH_LIBS_FIRST,
[AC_PREREQ([2.13])
AC_CACHE_CHECK([for library containing $1], [ac_cv_search_$1],
[ac_func_search_save_LIBS="$LIBS"
ac_cv_search_$1="no"
for i in $2; do
LIBS="-l$i $5 $ac_func_search_save_LIBS"
AC_TRY_LINK_FUNC([$1],
[ac_cv_search_$1="-l$i"
break])
done
if test "$ac_cv_search_$1" = "no"; then
AC_TRY_LINK_FUNC([$1], [ac_cv_search_$1="none required"])
fi
LIBS="$ac_func_search_save_LIBS"])
if test "$ac_cv_search_$1" != "no"; then
test "$ac_cv_search_$1" = "none required" || LIBS="$ac_cv_search_$1 $LIBS"
$3
else :
$4
fi])
# Check if we can build and use 64-bit libraries
AC_DEFUN(AC_CHECK_64BIT_LIBS,
[
BITS_REQ=nochange
ENABLE_64BIT_LIBS=unknown
## revisit this for cross-compile.
AC_ARG_ENABLE(64bit-libs,
[ --enable-64bit-libs (deprecated, use --with-library-bits) build 64-bit libraries [default= platform default]],
[echo "note, use --with-library-bits instead of --*-64bit-libs"
case "${enableval}" in
no|false|32) with_library_bits=32; ;;
yes|true|64) with_library_bits=64else32 ;;
nochange) with_library_bits=nochange; ;;
*) AC_MSG_ERROR(bad value ${enableval} for '--*-64bit-libs') ;;
esac] )
AC_ARG_WITH(library-bits,
[ --with-library-bits=bits specify how many bits to use for the library (32, 64, 64else32, nochange) [default=nochange]],
[case "${withval}" in
""|nochange) BITS_REQ=$withval ;;
32|64|64else32) BITS_REQ=$withval ;;
*) AC_MSG_ERROR(bad value ${withval} for --with-library-bits) ;;
esac])
# don't use these for cross compiling
if test "$cross_compiling" = "yes" -a "${BITS_REQ}" != "nochange"; then
AC_MSG_ERROR([Don't specify bitness when cross compiling. See readme.html for help with cross compilation., and set compiler options manually.])
fi
AC_CHECK_SIZEOF([void *])
AC_MSG_CHECKING([whether runnable 64 bit binaries are built by default])
case $ac_cv_sizeof_void_p in
8) DEFAULT_64BIT=yes ;;
4) DEFAULT_64BIT=no ;;
*) DEFAULT_64BIT=unknown
esac
BITS_GOT=unknown
# 'OK' here means, we can exit any further checking, everything's copa
BITS_OK=yes
# do we need to check for buildable/runnable 32 or 64 bit?
BITS_CHECK_32=no
BITS_CHECK_64=no
# later, can we run the 32/64 bit binaries so made?
BITS_RUN_32=no
BITS_RUN_64=no
if test "$DEFAULT_64BIT" = "yes"; then
# we get 64 bits by default.
BITS_GOT=64
case "$BITS_REQ" in
32)
# need to look for 32 bit support.
BITS_CHECK_32=yes
# not copa.
BITS_OK=no;;
# everyone else is happy.
nochange) ;;
*) ;;
esac
elif test "$DEFAULT_64BIT" = "no"; then
# not 64 bit by default.
BITS_GOT=32
case "$BITS_REQ" in
64|64else32)
BITS_CHECK_64=yes
#BITS_CHECK_32=yes
BITS_OK=no;;
nochange) ;;
*) ;;
esac
elif test "$DEFAULT_64BIT" = "unknown"; then
# cross compiling.
BITS_GOT=unknown
case "$BITS_REQ" in
64|64else32) BITS_OK=no
BITS_CHECK_32=yes
BITS_CHECK_64=yes ;;
32) BITS_OK=no;;
nochange) ;;
*) ;;
esac
fi
AC_MSG_RESULT($DEFAULT_64BIT);
if test "$BITS_OK" != "yes"; then
# not copa. back these up.
CFLAGS_OLD="${CFLAGS}"
CXXFLAGS_OLD="${CXXFLAGS}"
LDFLAGS_OLD="${LDFLAGS}"
ARFLAGS_OLD="${ARFLAGS}"
CFLAGS_32="${CFLAGS}"
CXXFLAGS_32="${CXXFLAGS}"
LDFLAGS_32="${LDFLAGS}"
ARFLAGS_32="${ARFLAGS}"
CFLAGS_64="${CFLAGS}"
CXXFLAGS_64="${CXXFLAGS}"
LDFLAGS_64="${LDFLAGS}"
ARFLAGS_64="${ARFLAGS}"
CAN_BUILD_64=unknown
CAN_BUILD_32=unknown
# These results can't be cached because is sets compiler flags.
if test "$BITS_CHECK_64" = "yes"; then
AC_MSG_CHECKING([how to build 64-bit executables])
CAN_BUILD_64=no
####
# Find out if we think we can *build* for 64 bit. Doesn't check whether we can run it.
# Note, we don't have to actually check if the options work- we'll try them before using them.
# So, only try actually testing the options, if you are trying to decide between multiple options.
# On exit from the following clauses:
# if CAN_BUILD_64=yes:
# *FLAGS are assumed to contain the right settings for 64bit
# else if CAN_BUILD_64=no: (default)
# *FLAGS are assumed to be trashed, and will be reset from *FLAGS_OLD
if test "$GCC" = yes; then
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no)
else
case "${host}" in
sparc*-*-solaris*)
# 1. try -m64
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_RUN_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no, CAN_BUILD_64=unknown)
if test "$CAN_BUILD_64" != yes; then
# Nope. back out changes.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CFLAGS_OLD}"
# 2. try xarch=v9 [deprecated]
## TODO: cross compile: the following won't work.
SPARCV9=`isainfo -n 2>&1 | grep sparcv9`
SOL64=`$CXX -xarch=v9 2>&1 && $CC -xarch=v9 2>&1 | grep -v usage:`
# "Warning: -xarch=v9 is deprecated, use -m64 to create 64-bit programs"
if test -z "$SOL64" && test -n "$SPARCV9"; then
CFLAGS="${CFLAGS} -xtarget=ultra -xarch=v9"
CXXFLAGS="${CXXFLAGS} -xtarget=ultra -xarch=v9"
LDFLAGS="${LDFLAGS} -xtarget=ultra -xarch=v9"
CAN_BUILD_64=yes
fi
fi
;;
i386-*-solaris*)
# 1. try -m64
CFLAGS="${CFLAGS} -m64"
CXXFLAGS="${CXXFLAGS} -m64"
AC_RUN_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no, CAN_BUILD_64=unknown)
if test "$CAN_BUILD_64" != yes; then
# Nope. back out changes.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
# 2. try the older compiler option
## TODO: cross compile problem
AMD64=`isainfo -n 2>&1 | grep amd64`
SOL64=`$CXX -xtarget=generic64 2>&1 && $CC -xtarget=generic64 2>&1 | grep -v usage:`
if test -z "$SOL64" && test -n "$AMD64"; then
CFLAGS="${CFLAGS} -xtarget=generic64"
CXXFLAGS="${CXXFLAGS} -xtarget=generic64"
CAN_BUILD_64=yes
fi
fi
;;
ia64-*-linux*)
# check for ecc/ecpc compiler support
## TODO: cross compiler problem
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Intel`"; then
if test -n "`$CXX --help 2>&1 && $CC --help 2>&1 | grep -v Itanium`"; then
CAN_BUILD_64=yes
fi
fi
;;
*-*-cygwin)
# vcvarsamd64.bat should have been used to enable 64-bit builds.
# We only do this check to display the correct answer.
## TODO: cross compiler problem
if test -n "`$CXX -help 2>&1 | grep 'for x64'`"; then
CAN_BUILD_64=yes
fi
;;
*-*-aix*|powerpc64-*-linux*)
CFLAGS="${CFLAGS} -q64"
CXXFLAGS="${CXXFLAGS} -q64"
LDFLAGS="${LDFLAGS} -q64"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" = yes; then
# worked- set other options.
case "${host}" in
*-*-aix*)
# tell AIX what executable mode to use.
ARFLAGS="${ARFLAGS} -X64"
esac
fi
;;
*-*-hpux*)
# First we try the newer +DD64, if that doesn't work,
# try other options.
CFLAGS="${CFLAGS} +DD64"
CXXFLAGS="${CXXFLAGS} +DD64"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no)
if test "$CAN_BUILD_64" != yes; then
# reset
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
# append
CFLAGS="${CFLAGS} +DA2.0W"
CXXFLAGS="${CXXFLAGS} +DA2.0W"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no)
fi
;;
*-*ibm-openedition*|*-*-os390*)
CFLAGS="${CFLAGS} -Wc,lp64"
CXXFLAGS="${CXXFLAGS} -Wc,lp64"
LDFLAGS="${LDFLAGS} -Wl,lp64"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
CAN_BUILD_64=yes, CAN_BUILD_64=no)
;;
*)
# unknown platform.
;;
esac
fi
AC_MSG_RESULT($CAN_BUILD_64)
if test "$CAN_BUILD_64" = yes; then
AC_MSG_CHECKING([whether runnable 64-bit binaries are being built ])
AC_RUN_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==64)?0:1;}])],
BITS_RUN_64=yes, BITS_RUN_64=no, BITS_RUN_64=unknown)
AC_MSG_RESULT($BITS_RUN_64);
CFLAGS_64="${CFLAGS}"
CXXFLAGS_64="${CXXFLAGS}"
LDFLAGS_64="${LDFLAGS}"
ARFLAGS_64="${ARFLAGS}"
fi
# put it back.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
LDFLAGS="${LDFLAGS_OLD}"
ARFLAGS="${ARFLAGS_OLD}"
fi
if test "$BITS_CHECK_32" = "yes"; then
# see comment under 'if BITS_CHECK_64', above.
AC_MSG_CHECKING([how to build 32-bit executables])
if test "$GCC" = yes; then
CFLAGS="${CFLAGS} -m32"
CXXFLAGS="${CXXFLAGS} -m32"
AC_COMPILE_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==32)?0:1;}])],
CAN_BUILD_32=yes, CAN_BUILD_32=no)
fi
AC_MSG_RESULT($CAN_BUILD_32)
if test "$CAN_BUILD_32" = yes; then
AC_MSG_CHECKING([whether runnable 32-bit binaries are being built ])
AC_RUN_IFELSE([AC_LANG_SOURCE([int main(void) {return (sizeof(void*)*8==32)?0:1;}])],
BITS_RUN_32=yes, BITS_RUN_32=no, BITS_RUN_32=unknown)
AC_MSG_RESULT($BITS_RUN_32);
CFLAGS_32="${CFLAGS}"
CXXFLAGS_32="${CXXFLAGS}"
LDFLAGS_32="${LDFLAGS}"
ARFLAGS_32="${ARFLAGS}"
fi
# put it back.
CFLAGS="${CFLAGS_OLD}"
CXXFLAGS="${CXXFLAGS_OLD}"
LDFLAGS="${LDFLAGS_OLD}"
ARFLAGS="${ARFLAGS_OLD}"
fi
##
# OK. Now, we've tested for 32 and 64 bitness. Let's see what we'll do.
#
# First, implement 64else32
if test "$BITS_REQ" = "64else32"; then
if test "$BITS_RUN_64" = "yes"; then
BITS_REQ=64
else
# no changes.
BITS_OK=yes
fi
fi
# implement.
if test "$BITS_REQ" = "32" -a "$BITS_RUN_32" = "yes"; then
CFLAGS="${CFLAGS_32}"
CXXFLAGS="${CXXFLAGS_32}"
LDFLAGS="${LDFLAGS_32}"
ARFLAGS="${ARFLAGS_32}"
BITS_OK=yes
elif test "$BITS_REQ" = "64" -a "$BITS_RUN_64" = "yes"; then
CFLAGS="${CFLAGS_64}"
CXXFLAGS="${CXXFLAGS_64}"
LDFLAGS="${LDFLAGS_64}"
ARFLAGS="${ARFLAGS_64}"
BITS_OK=yes
elif test "$BITS_OK" != "yes"; then
AC_MSG_ERROR([Requested $BITS_REQ bit binaries but could not compile and execute them. See readme.html for help with cross compilation., and set compiler options manually.])
fi
fi
])
# Strict compilation options.
AC_DEFUN(AC_CHECK_STRICT_COMPILE,
[
AC_MSG_CHECKING([whether strict compiling is on])
AC_ARG_ENABLE(strict,[ --enable-strict compile with strict compiler options [default=yes]], [
if test "$enableval" = no
then
ac_use_strict_options=no
else
ac_use_strict_options=yes
fi
], [ac_use_strict_options=yes])
AC_MSG_RESULT($ac_use_strict_options)
if test "$ac_use_strict_options" = yes
then
if test "$GCC" = yes
then
# Do not use -ansi. It limits us to C90, and it breaks some platforms.
# We use -std=c99 to disable the gnu99 defaults and its associated warnings
CFLAGS="$CFLAGS -Wall -std=c99 -pedantic -Wshadow -Wpointer-arith -Wmissing-prototypes -Wwrite-strings"
else
case "${host}" in
*-*-cygwin)
if test "`$CC /help 2>&1 | head -c9`" = "Microsoft"
then
CFLAGS="$CFLAGS /W4"
fi
esac
fi
if test "$GXX" = yes
then
CXXFLAGS="$CXXFLAGS -W -Wall -pedantic -Wpointer-arith -Wwrite-strings -Wno-long-long"
else
case "${host}" in
*-*-cygwin)
if test "`$CXX /help 2>&1 | head -c9`" = "Microsoft"
then
CXXFLAGS="$CXXFLAGS /W4"
fi
esac
fi
fi
])

Просмотреть файл

@ -0,0 +1,339 @@
Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cal", "..\samples\cal\cal.vcxproj", "{F7659D77-09CF-4FE9-ACEE-927287AA9509}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cintltst", "..\test\cintltst\cintltst.vcxproj", "{3D1246AE-1B32-479B-BECA-AEFA97BE2321}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "common", "..\common\common.vcxproj", "{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ctestfw", "..\tools\ctestfw\ctestfw.vcxproj", "{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "date", "..\samples\date\date.vcxproj", "{38B5751A-C6F9-4409-950C-F4F9DA17275F}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "derb", "..\tools\genrb\derb.vcxproj", "{D3065ADB-8820-4CC7-9B6C-9510833961A3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genbrk", "..\tools\genbrk\genbrk.vcxproj", "{C2BE5000-7501-4E87-9724-B8D82494FAE6}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genccode", "..\tools\genccode\genccode.vcxproj", "{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencmn", "..\tools\gencmn\gencmn.vcxproj", "{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencnval", "..\tools\gencnval\gencnval.vcxproj", "{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "genrb", "..\tools\genrb\genrb.vcxproj", "{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gentest", "..\tools\gentest\gentest.vcxproj", "{77C78066-746F-4EA6-B3FE-B8C8A4A97891}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "i18n", "..\i18n\i18n.vcxproj", "{0178B127-6269-407D-B112-93877BB62776}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "intltest", "..\test\intltest\intltest.vcxproj", "{73632960-B3A6-464D-83A3-4B43365F19B8}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layout", "..\layout\layout.vcxproj", "{C920062A-0647-4553-A3B2-37C58065664B}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "layoutex", "..\layoutex\layoutex.vcxproj", "{37FC2C7F-1904-4811-8955-2F478830EAD1}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makeconv", "..\tools\makeconv\makeconv.vcxproj", "{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "makedata", "..\data\makedata.vcxproj", "{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "pkgdata", "..\tools\pkgdata\pkgdata.vcxproj", "{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stubdata", "..\stubdata\stubdata.vcxproj", "{203EC78A-0531-43F0-A636-285439BDE025}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "toolutil", "..\tools\toolutil\toolutil.vcxproj", "{6B231032-3CB5-4EED-9210-810D666A23A0}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "uconv", "..\extra\uconv\uconv.vcxproj", "{DBA4088D-F6F9-4F8F-8820-082A4765C16C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "io", "..\io\io.vcxproj", "{C2B04507-2521-4801-BF0D-5FD79D6D518C}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gensprep", "..\tools\gensprep\gensprep.vcxproj", "{631C23CE-6C1D-4875-88F0-85E0A42B36EA}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iotest", "..\test\iotest\iotest.vcxproj", "{E4993E82-D68A-46CA-BAE0-9D35E172E46F}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icupkg", "..\tools\icupkg\icupkg.vcxproj", "{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}"
EndProject
Project("{9D4211F7-2C77-439C-82F0-30A4E43BA569}") = "gendict", "..\tools\gendict\gendict.vcxproj", "{9D4211F7-2C77-439C-82F0-30A4E43BA569}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "letest", "..\test\letest\letest.vcxproj", "{67351485-4D18-4245-BE39-A7EF0675ACD2}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gencfu", "..\tools\gencfu\gencfu.vcxproj", "{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gennorm2", "..\tools\gennorm2\gennorm2.vcxproj", "{C7891A65-80AB-4245-912E-5F1E17B0E6C4}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "icuinfo", "..\tools\icuinfo\icuinfo.vcxproj", "{E7611F49-F088-4175-9446-6111444E72C8}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "testplug", "..\tools\icuinfo\testplug.vcxproj", "{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}"
EndProject
Global
GlobalSection(SubversionScc) = preSolution
Svn-Managed = True
Manager = AnkhSVN - Subversion Support for Visual Studio
EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.ActiveCfg = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|Win32.Build.0 = Debug|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.ActiveCfg = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Debug|x64.Build.0 = Debug|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.ActiveCfg = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|Win32.Build.0 = Release|Win32
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.ActiveCfg = Release|x64
{F7659D77-09CF-4FE9-ACEE-927287AA9509}.Release|x64.Build.0 = Release|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.ActiveCfg = Debug|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|Win32.Build.0 = Debug|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.ActiveCfg = Debug|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Debug|x64.Build.0 = Debug|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.ActiveCfg = Release|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|Win32.Build.0 = Release|Win32
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.ActiveCfg = Release|x64
{3D1246AE-1B32-479B-BECA-AEFA97BE2321}.Release|x64.Build.0 = Release|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.ActiveCfg = Debug|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|Win32.Build.0 = Debug|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.ActiveCfg = Debug|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Debug|x64.Build.0 = Debug|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.ActiveCfg = Release|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|Win32.Build.0 = Release|Win32
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.ActiveCfg = Release|x64
{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}.Release|x64.Build.0 = Release|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.ActiveCfg = Debug|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|Win32.Build.0 = Debug|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.ActiveCfg = Debug|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Debug|x64.Build.0 = Debug|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.ActiveCfg = Release|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|Win32.Build.0 = Release|Win32
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.ActiveCfg = Release|x64
{ECA6B435-B4FA-4F9F-BF95-F451D078FC47}.Release|x64.Build.0 = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.ActiveCfg = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|Win32.Build.0 = Debug|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.ActiveCfg = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Debug|x64.Build.0 = Debug|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.ActiveCfg = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|Win32.Build.0 = Release|Win32
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.ActiveCfg = Release|x64
{38B5751A-C6F9-4409-950C-F4F9DA17275F}.Release|x64.Build.0 = Release|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.ActiveCfg = Debug|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|Win32.Build.0 = Debug|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.ActiveCfg = Debug|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Debug|x64.Build.0 = Debug|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.ActiveCfg = Release|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|Win32.Build.0 = Release|Win32
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.ActiveCfg = Release|x64
{D3065ADB-8820-4CC7-9B6C-9510833961A3}.Release|x64.Build.0 = Release|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.ActiveCfg = Debug|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|Win32.Build.0 = Debug|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.ActiveCfg = Debug|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Debug|x64.Build.0 = Debug|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.ActiveCfg = Release|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|Win32.Build.0 = Release|Win32
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.ActiveCfg = Release|x64
{C2BE5000-7501-4E87-9724-B8D82494FAE6}.Release|x64.Build.0 = Release|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.ActiveCfg = Debug|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|Win32.Build.0 = Debug|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.ActiveCfg = Debug|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Debug|x64.Build.0 = Debug|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.ActiveCfg = Release|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|Win32.Build.0 = Release|Win32
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.ActiveCfg = Release|x64
{FDD3C4F2-9805-44EB-9A77-BC1C1C95B547}.Release|x64.Build.0 = Release|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.ActiveCfg = Debug|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|Win32.Build.0 = Debug|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.ActiveCfg = Debug|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Debug|x64.Build.0 = Debug|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.ActiveCfg = Release|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|Win32.Build.0 = Release|Win32
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.ActiveCfg = Release|x64
{A8D36F8D-09E6-4174-91C3-7BEAA9C3F04F}.Release|x64.Build.0 = Release|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.ActiveCfg = Debug|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|Win32.Build.0 = Debug|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.ActiveCfg = Debug|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Debug|x64.Build.0 = Debug|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.ActiveCfg = Release|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|Win32.Build.0 = Release|Win32
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.ActiveCfg = Release|x64
{8B41752B-5A52-41E4-B7E0-07921C0CC6BF}.Release|x64.Build.0 = Release|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.ActiveCfg = Debug|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|Win32.Build.0 = Debug|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.ActiveCfg = Debug|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Debug|x64.Build.0 = Debug|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.ActiveCfg = Release|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|Win32.Build.0 = Release|Win32
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.ActiveCfg = Release|x64
{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}.Release|x64.Build.0 = Release|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.ActiveCfg = Debug|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|Win32.Build.0 = Debug|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.ActiveCfg = Debug|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Debug|x64.Build.0 = Debug|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.ActiveCfg = Release|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|Win32.Build.0 = Release|Win32
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.ActiveCfg = Release|x64
{77C78066-746F-4EA6-B3FE-B8C8A4A97891}.Release|x64.Build.0 = Release|x64
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.ActiveCfg = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Debug|Win32.Build.0 = Debug|Win32
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.ActiveCfg = Debug|x64
{0178B127-6269-407D-B112-93877BB62776}.Debug|x64.Build.0 = Debug|x64
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.ActiveCfg = Release|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|Win32.Build.0 = Release|Win32
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.ActiveCfg = Release|x64
{0178B127-6269-407D-B112-93877BB62776}.Release|x64.Build.0 = Release|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.ActiveCfg = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|Win32.Build.0 = Debug|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.ActiveCfg = Debug|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Debug|x64.Build.0 = Debug|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.ActiveCfg = Release|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|Win32.Build.0 = Release|Win32
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.ActiveCfg = Release|x64
{73632960-B3A6-464D-83A3-4B43365F19B8}.Release|x64.Build.0 = Release|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.ActiveCfg = Debug|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|Win32.Build.0 = Debug|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.ActiveCfg = Debug|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Debug|x64.Build.0 = Debug|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.ActiveCfg = Release|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Release|Win32.Build.0 = Release|Win32
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.ActiveCfg = Release|x64
{C920062A-0647-4553-A3B2-37C58065664B}.Release|x64.Build.0 = Release|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.ActiveCfg = Debug|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|Win32.Build.0 = Debug|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.ActiveCfg = Debug|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Debug|x64.Build.0 = Debug|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.ActiveCfg = Release|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|Win32.Build.0 = Release|Win32
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.ActiveCfg = Release|x64
{37FC2C7F-1904-4811-8955-2F478830EAD1}.Release|x64.Build.0 = Release|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.ActiveCfg = Debug|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|Win32.Build.0 = Debug|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.ActiveCfg = Debug|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Debug|x64.Build.0 = Debug|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.ActiveCfg = Release|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|Win32.Build.0 = Release|Win32
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.ActiveCfg = Release|x64
{F5AD9738-1A3D-4906-B9C4-A7D9CE33DC2C}.Release|x64.Build.0 = Release|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.ActiveCfg = Debug|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|Win32.Build.0 = Debug|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.ActiveCfg = Debug|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Debug|x64.Build.0 = Debug|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.ActiveCfg = Release|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|Win32.Build.0 = Release|Win32
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.ActiveCfg = Release|x64
{D9DF7F2F-93B7-4810-B5CD-96F4F33C079B}.Release|x64.Build.0 = Release|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.ActiveCfg = Debug|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|Win32.Build.0 = Debug|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.ActiveCfg = Debug|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Debug|x64.Build.0 = Debug|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.ActiveCfg = Release|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|Win32.Build.0 = Release|Win32
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.ActiveCfg = Release|x64
{4C8454FE-81D3-4CA3-9927-29BA96F03DAC}.Release|x64.Build.0 = Release|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.ActiveCfg = Debug|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|Win32.Build.0 = Debug|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.ActiveCfg = Debug|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Debug|x64.Build.0 = Debug|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.ActiveCfg = Release|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Release|Win32.Build.0 = Release|Win32
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.ActiveCfg = Release|x64
{203EC78A-0531-43F0-A636-285439BDE025}.Release|x64.Build.0 = Release|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.ActiveCfg = Debug|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|Win32.Build.0 = Debug|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.ActiveCfg = Debug|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Debug|x64.Build.0 = Debug|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.ActiveCfg = Release|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|Win32.Build.0 = Release|Win32
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.ActiveCfg = Release|x64
{6B231032-3CB5-4EED-9210-810D666A23A0}.Release|x64.Build.0 = Release|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.ActiveCfg = Debug|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|Win32.Build.0 = Debug|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.ActiveCfg = Debug|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Debug|x64.Build.0 = Debug|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.ActiveCfg = Release|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|Win32.Build.0 = Release|Win32
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.ActiveCfg = Release|x64
{DBA4088D-F6F9-4F8F-8820-082A4765C16C}.Release|x64.Build.0 = Release|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.ActiveCfg = Debug|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|Win32.Build.0 = Debug|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.ActiveCfg = Debug|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Debug|x64.Build.0 = Debug|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.ActiveCfg = Release|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|Win32.Build.0 = Release|Win32
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.ActiveCfg = Release|x64
{C2B04507-2521-4801-BF0D-5FD79D6D518C}.Release|x64.Build.0 = Release|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.ActiveCfg = Debug|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|Win32.Build.0 = Debug|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.ActiveCfg = Debug|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Debug|x64.Build.0 = Debug|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.ActiveCfg = Release|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|Win32.Build.0 = Release|Win32
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.ActiveCfg = Release|x64
{631C23CE-6C1D-4875-88F0-85E0A42B36EA}.Release|x64.Build.0 = Release|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.ActiveCfg = Debug|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|Win32.Build.0 = Debug|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.ActiveCfg = Debug|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Debug|x64.Build.0 = Debug|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.ActiveCfg = Release|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|Win32.Build.0 = Release|Win32
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.ActiveCfg = Release|x64
{E4993E82-D68A-46CA-BAE0-9D35E172E46F}.Release|x64.Build.0 = Release|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.ActiveCfg = Debug|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|Win32.Build.0 = Debug|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.ActiveCfg = Debug|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Debug|x64.Build.0 = Debug|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.ActiveCfg = Release|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|Win32.Build.0 = Release|Win32
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.ActiveCfg = Release|x64
{62D4B15D-7A90-4ECB-BA19-5E021D6A21BC}.Release|x64.Build.0 = Release|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.ActiveCfg = Debug|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|Win32.Build.0 = Debug|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.ActiveCfg = Debug|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Debug|x64.Build.0 = Debug|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.ActiveCfg = Release|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|Win32.Build.0 = Release|Win32
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.ActiveCfg = Release|x64
{9D4211F7-2C77-439C-82F0-30A4E43BA569}.Release|x64.Build.0 = Release|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.ActiveCfg = Debug|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|Win32.Build.0 = Debug|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.ActiveCfg = Debug|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Debug|x64.Build.0 = Debug|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.ActiveCfg = Release|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|Win32.Build.0 = Release|Win32
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.ActiveCfg = Release|x64
{67351485-4D18-4245-BE39-A7EF0675ACD2}.Release|x64.Build.0 = Release|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.ActiveCfg = Debug|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|Win32.Build.0 = Debug|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.ActiveCfg = Debug|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Debug|x64.Build.0 = Debug|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.ActiveCfg = Release|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|Win32.Build.0 = Release|Win32
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.ActiveCfg = Release|x64
{691EE0C0-DC57-4A48-8AEE-8ED75EB3A057}.Release|x64.Build.0 = Release|x64
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.ActiveCfg = Debug|Win32
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|Win32.Build.0 = Debug|Win32
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.ActiveCfg = Debug|x64
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Debug|x64.Build.0 = Debug|x64
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.ActiveCfg = Release|Win32
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|Win32.Build.0 = Release|Win32
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.ActiveCfg = Release|x64
{C7891A65-80AB-4245-912E-5F1E17B0E6C4}.Release|x64.Build.0 = Release|x64
{E7611F49-F088-4175-9446-6111444E72C8}.Debug|Win32.ActiveCfg = Debug|Win32
{E7611F49-F088-4175-9446-6111444E72C8}.Debug|Win32.Build.0 = Debug|Win32
{E7611F49-F088-4175-9446-6111444E72C8}.Debug|x64.ActiveCfg = Debug|x64
{E7611F49-F088-4175-9446-6111444E72C8}.Debug|x64.Build.0 = Debug|x64
{E7611F49-F088-4175-9446-6111444E72C8}.Release|Win32.ActiveCfg = Release|Win32
{E7611F49-F088-4175-9446-6111444E72C8}.Release|Win32.Build.0 = Release|Win32
{E7611F49-F088-4175-9446-6111444E72C8}.Release|x64.ActiveCfg = Release|x64
{E7611F49-F088-4175-9446-6111444E72C8}.Release|x64.Build.0 = Release|x64
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|Win32.ActiveCfg = Debug|Win32
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|Win32.Build.0 = Debug|Win32
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|x64.ActiveCfg = Debug|x64
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Debug|x64.Build.0 = Debug|x64
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|Win32.ActiveCfg = Release|Win32
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|Win32.Build.0 = Release|Win32
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|x64.ActiveCfg = Release|x64
{659D0C08-D4ED-4BF3-B02B-2D8D4B5A7A7A}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,128 @@
@echo off
REM ********************************************************************
REM * COPYRIGHT:
REM * Copyright (c) 2010-2012, International Business Machines Corporation
REM * and others. All Rights Reserved.
REM ********************************************************************
set ICU_ARCH=%1
set ICU_DBRL=%2
if "%1" == "" (
echo Usage: %0 "x86 or x64" "Debug or Release"
exit /b 1
)
if "%2" == "" (
echo Usage: %0 %1 "Debug or Release"
exit /b 1
)
set ICU_OPATH=%PATH%
set ICU_ICUDIR="%~dp0"\..\..
if "%ICU_ARCH%" == "x64" (
set ICU_BINDIR=%~dp0\..\..\bin64
) else (
set ICU_BINDIR=%~dp0\..\..\bin
)
set PATH=%ICU_BINDIR%;%PATH%
echo testing ICU in %ICU_ICUDIR% arch=%ICU_ARCH% type=%ICU_DBRL%
pushd %ICU_ICUDIR%
@rem factor these out
set ICUINFO_CMD=%ICU_ICUDIR%\source\tools\icuinfo\%ICU_ARCH%\%ICU_DBRL%\icuinfo.exe
set INTLTEST_CMD=%ICU_ICUDIR%\source\test\intltest\%ICU_ARCH%\%ICU_DBRL%\intltest.exe
set IOTEST_CMD=%ICU_ICUDIR%\source\test\iotest\%ICU_ARCH%\%ICU_DBRL%\iotest.exe
set CINTLTST_CMD=%ICU_ICUDIR%\source\test\cintltst\%ICU_ARCH%\%ICU_DBRL%\cintltst.exe
set LETEST_CMD=%ICU_ICUDIR%\source\test\letest\%ICU_ARCH%\%ICU_DBRL%\letest.exe
set ICUFAILED=
set ICURUN=
set ICUFAILCNT=0
@echo on
@set THT=icuinfo
@echo ==== %THT% =========================================================================
%ICUINFO_CMD% %ICUINFO_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_icuinfo
@set ICURUN=%ICURUN% %THT%
@set THT=intltest
@echo ==== %THT% =========================================================================
@cd %ICU_ICUDIR%\source\test\intltest
%INTLTEST_CMD% %INTLTEST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_intltest
@set ICURUN=%ICURUN% %THT%
@set THT=iotest
@echo ==== %THT% =========================================================================
@cd %ICU_ICUDIR%\source\test\iotest
%IOTEST_CMD% %IOTEST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_IOTEST
@set ICURUN=%ICURUN% %THT%
@set THT=cintltst
@echo ==== %THT% =========================================================================
@cd %ICU_ICUDIR%\source\test\cintltst
%CINTLTST_CMD% %CINTLTST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_cintltst
@set ICURUN=%ICURUN% %THT%
@set THT=letest
@echo ==== %THT% =========================================================================
@cd %ICU_ICUDIR%\source\test\letest
%LETST_CMD% %LETEST_OPTS%
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
@set ICUFAILED=%ICUFAILED% %THT%
@set ICUFAILCNT=1
:OK_letest
@set ICURUN=%ICURUN% %THT%
@echo off
REM clean up
set PATH=%ICU_OPATH%
REM unset ICU_OPATH
popd
@REM done
echo -
echo -
echo -
echo ============================================================
echo Summary: ICU in %ICU_ICUDIR% arch=%ICU_ARCH% type=%ICU_DBRL%
echo -
echo Tests Run : %ICURUN%
if %ICUFAILCNT% == 0 (
echo " - All Passed!"
exit /b 0
)
echo Failing Tests: %ICUFAILED%
echo -
echo FAILED!
exit /b 1

Просмотреть файл

@ -0,0 +1,225 @@
#******************************************************************************
#
# Copyright (C) 1999-2012, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
## Makefile.in for ICU - icuuc.so
## Stephen F. Booth
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ..
## All the flags and other definitions are included here.
include $(top_builddir)/icudefs.mk
## Build directory information
subdir = common
# for service hook
LOCALSVC_CPP=localsvc.cpp
SVC_HOOK_INC=$(top_builddir)/common/svchook.mk
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB) $(SVC_HOOK_INC)
## Target information
TARGET_STUBNAME=$(COMMON_STUBNAME)
ifneq ($(ENABLE_STATIC),)
TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
endif
ifneq ($(ENABLE_SHARED),)
SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
ifeq ($(ENABLE_SO_VERSION_DATA),1)
SO_VERSION_DATA = common.res
endif
ifeq ($(OS390BATCH),1)
BATCH_TARGET = $(BATCH_COMMON_TARGET)
BATCH_LIBS = $(BATCH_LIBICUDT) -lm
endif # OS390BATCH
endif # ENABLE_SHARED
ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS) $(BATCH_TARGET)
DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
ifeq ($(OS390BATCH),1)
CFLAGS += -WI
CXXFLAGS += -WI
endif
CPPFLAGS += -I$(srcdir) $(LIBCPPFLAGS) $(CPPFLAGSICUUC)
# we want DEFS here
DEFS += -DU_COMMON_IMPLEMENTATION
LDFLAGS += $(LDFLAGSICUUC)
# for plugin configuration
CPPFLAGS += "-DDEFAULT_ICU_PLUGINS=\"$(libdir)/icu\" "
# for icu data location
ifeq ($(PKGDATA_MODE),common)
CPPFLAGS += "-DU_ICU_DATA_DEFAULT_DIR=\"$(ICUDATA_DIR)\""
endif
# $(LIBICUDT) is either stub data or the real DLL common data.
LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o \
uinit.o uobject.o cmemory.o charstr.o \
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
bytestream.o stringpiece.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
dictionarydata.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
DEPS = $(OBJECTS:.o=.d)
-include Makefile.local
-include $(SVC_HOOK_INC)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local install-library install-headers dist \
dist-local check check-local check-exhaustive
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
check-exhaustive: check
all-local: $(ALL_TARGETS)
install-local: install-headers install-library
install-library: all-local
$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
ifneq ($(ENABLE_STATIC),)
$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
endif
ifneq ($(ENABLE_SHARED),)
$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
endif
endif
ifneq ($(IMPORT_LIB_EXT),)
$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
endif
ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
endif
endif
endif
$(SVC_HOOK_INC):
@echo generating $@
@-test -f $(top_srcdir)/common/$(LOCALSVC_CPP) && ( echo "have $(LOCALSVC_CPP) - U_LOCAL_SERVICE_HOOK=1" ; \
echo 'CPPFLAGS +=-DU_LOCAL_SERVICE_HOOK=1' > $@ ; \
echo 'OBJECTS += $(LOCALSVC_CPP:%.cpp=%.o)' >> $@ \
) ; true
@echo "# Autogenerated by Makefile" >> $@
install-headers:
$(MKINSTALLDIRS) $(DESTDIR)$(includedir)/unicode
@for file in $(HEADERS); do \
echo "$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode"; \
$(INSTALL_DATA) $$file $(DESTDIR)$(includedir)/unicode || exit; \
done
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS) $(SO_VERSION_DATA)
distclean-local: clean-local
$(RMV) Makefile icucfg.h $(SVC_HOOK_INC)
check-local:
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status $(SVC_HOOK_INC)
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
ifneq ($(ENABLE_STATIC),)
$(TARGET): $(STATIC_OBJECTS)
$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
$(RANLIB) $@
endif
ifneq ($(ENABLE_SHARED),)
$(SHARED_OBJECT): $(OBJECTS) $(SO_VERSION_DATA)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
ifeq ($(ENABLE_RPATH),YES)
ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
$(warning RPATH warning: --enable-rpath means test programs may use existing $(libdir)/$(MIDDLE_SO_TARGET))
endif
endif
ifeq ($(OS390BATCH),1)
$(BATCH_TARGET):$(OBJECTS)
$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(BATCH_LIBS)
endif # OS390BATCH
endif # ENABLE_SHARED
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

Просмотреть файл

@ -0,0 +1,74 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: appendable.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010dec07
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/appendable.h"
#include "unicode/utf16.h"
U_NAMESPACE_BEGIN
Appendable::~Appendable() {}
UBool
Appendable::appendCodePoint(UChar32 c) {
if(c<=0xffff) {
return appendCodeUnit((UChar)c);
} else {
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
}
}
UBool
Appendable::appendString(const UChar *s, int32_t length) {
if(length<0) {
UChar c;
while((c=*s++)!=0) {
if(!appendCodeUnit(c)) {
return FALSE;
}
}
} else if(length>0) {
const UChar *limit=s+length;
do {
if(!appendCodeUnit(*s++)) {
return FALSE;
}
} while(s<limit);
}
return TRUE;
}
UBool
Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) {
return TRUE;
}
UChar *
Appendable::getAppendBuffer(int32_t minCapacity,
int32_t /*desiredCapacityHint*/,
UChar *scratch, int32_t scratchCapacity,
int32_t *resultCapacity) {
if(minCapacity<1 || scratchCapacity<minCapacity) {
*resultCapacity=0;
return NULL;
}
*resultCapacity=scratchCapacity;
return scratch;
}
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable)
// UnicodeStringAppendable is implemented in unistr.cpp.
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,732 @@
/*
******************************************************************************
*
* Copyright (C) 2007-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "bmpset.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
/*
* Set the list indexes for binary searches for
* U+0800, U+1000, U+2000, .., U+F000, U+10000.
* U+0800 is the first 3-byte-UTF-8 code point. Lower code points are
* looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
list4kStarts[0]=findCodePoint(0x800, 0, listLength-1);
int32_t i;
for(i=1; i<=0x10; ++i) {
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
}
list4kStarts[0x11]=listLength-1;
initBits();
overrideIllegal();
}
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
}
BMPSet::~BMPSet() {
}
/*
* Set bits in a bit rectangle in "vertical" bit organization.
* start<limit<=0x800
*/
static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
U_ASSERT(start<limit);
U_ASSERT(limit<=0x800);
int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits.
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
// Set one bit indicating an all-one block.
uint32_t bits=(uint32_t)1<<lead;
if((start+1)==limit) { // Single-character shortcut.
table[trail]|=bits;
return;
}
int32_t limitLead=limit>>6;
int32_t limitTrail=limit&0x3f;
if(lead==limitLead) {
// Partial vertical bit column.
while(trail<limitTrail) {
table[trail++]|=bits;
}
} else {
// Partial vertical bit column,
// followed by a bit rectangle,
// followed by another partial vertical bit column.
if(trail>0) {
do {
table[trail++]|=bits;
} while(trail<64);
++lead;
}
if(lead<limitLead) {
bits=~((1<<lead)-1);
if(limitLead<0x20) {
bits&=(1<<limitLead)-1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
}
}
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
// In that case, bits=1<<limitLead is undefined but the bits value
// is not used because trail<limitTrail is already false.
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
for(trail=0; trail<limitTrail; ++trail) {
table[trail]|=bits;
}
}
}
void BMPSet::initBits() {
UChar32 start, limit;
int32_t listIndex=0;
// Set asciiBytes[].
do {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(start>=0x80) {
break;
}
do {
asciiBytes[start++]=1;
} while(start<limit && start<0x80);
} while(limit<=0x80);
// Set table7FF[].
while(start<0x800) {
set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800);
if(limit>0x800) {
start=0x800;
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
// Set bmpBlockBits[].
int32_t minStart=0x800;
while(start<0x10000) {
if(limit>0x10000) {
limit=0x10000;
}
if(start<minStart) {
start=minStart;
}
if(start<limit) { // Else: Another range entirely in a known mixed-value block.
if(start&0x3f) {
// Mixed-value block of 64 code points.
start>>=6;
bmpBlockBits[start&0x3f]|=0x10001<<(start>>6);
start=(start+1)<<6; // Round up to the next block boundary.
minStart=start; // Ignore further ranges in this block.
}
if(start<limit) {
if(start<(limit&~0x3f)) {
// Multiple all-ones blocks of 64 code points each.
set32x64Bits(bmpBlockBits, start>>6, limit>>6);
}
if(limit&0x3f) {
// Mixed-value block of 64 code points.
limit>>=6;
bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6);
limit=(limit+1)<<6; // Round up to the next block boundary.
minStart=limit; // Ignore further ranges in this block.
}
}
}
if(limit==0x10000) {
break;
}
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
}
}
/*
* Override some bits and bytes to the result of contains(FFFD)
* for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF.
*/
void BMPSet::overrideIllegal() {
uint32_t bits, mask;
int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) {
table7FF[i]|=bits;
}
bits=1; // Lead byte 0xE0.
for(i=0; i<32; ++i) { // First half of 4k block.
bmpBlockBits[i]|=bits;
}
mask=~(0x10001<<0xd); // Lead byte 0xED.
bits=1<<0xd;
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
}
} else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask;
}
}
}
int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
/* Examples:
findCodePoint(c)
set list[] c=0 1 3 4 7 8
=== ============== ===========
[] [110000] 0 0 0 0 0 0
[\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
[\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
[:Any:] [0, 110000] 1 1 1 1 1 1
*/
// Return the smallest i such that c < list[i]. Assume
// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
if (c < list[lo])
return lo;
// High runner test. c is often after the last range, so an
// initial check for this condition pays off.
if (lo >= hi || c >= list[hi-1])
return hi;
// invariant: c >= list[lo]
// invariant: c < list[hi]
for (;;) {
int32_t i = (lo + hi) >> 1;
if (i == lo) {
break; // Found!
} else if (c < list[i]) {
hi = i;
} else {
lo = i;
}
}
return hi;
}
UBool
BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) {
return (UBool)asciiBytes[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
return (UBool)twoBits;
} else {
// Look up the code point in its 4k block of code points.
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
}
} else if((uint32_t)c<=0x10ffff) {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
// Out-of-range code points get FALSE, consistent with long-standing
// behavior of UnicodeSet::contains(c).
return FALSE;
}
}
/*
* Check for sufficient length for trail unit for each surrogate pair.
* Handle single surrogates as surrogate code points as usual in ICU.
*/
const UChar *
BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
do {
c=*s;
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
} else {
// span not
do {
c=*s;
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
++s;
}
} while(++s<limit);
}
return s;
}
/* Symmetrical with span(). */
const UChar *
BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const {
UChar c, c2;
if(spanCondition) {
// span
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(!asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits==0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
} else {
// span not
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(asciiBytes[c]) {
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=0) {
break;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) {
break;
}
}
} else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) {
// surrogate code point
if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) {
break;
}
} else {
// surrogate pair
if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) {
break;
}
--limit;
}
if(s==limit) {
return s;
}
}
}
return limit+1;
}
/*
* Precheck for sufficient trail bytes at end of string only once per span.
* Check validity.
*/
const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length;
uint8_t b=*s;
if((int8_t)b>=0) {
// Initial all-ASCII span.
if(spanCondition) {
do {
if(!asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
}
length=(int32_t)(limit-s);
}
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
const uint8_t *limit0=limit;
/*
* Make sure that the last 1/2/3/4-byte sequence before limit is complete
* or runs into a lead byte.
* In the span loop compare s with limit only once
* per multi-byte character.
*
* Give a trailing illegal sequence the same value as the result of contains(FFFD),
* including it if that is part of the span, otherwise set limit0 to before
* the truncated sequence.
*/
b=*(limit-1);
if((int8_t)b<0) {
// b>=0x80: lead or trail byte
if(b<0xc0) {
// single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes
limit-=3;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
} else {
// lead byte with no trail bytes
--limit;
if(asciiBytes[0x80]!=spanCondition) {
limit0=limit;
}
}
}
uint8_t t1, t2, t3;
while(s<limit) {
b=*s;
if(b<0xc0) {
// ASCII; or trail bytes with the result of contains(FFFD).
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
} else {
do {
if(asciiBytes[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
}
}
++s; // Advance past the lead byte.
if(b>=0xe0) {
if(b<0xf0) {
if( /* handle U+0000..U+FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
) {
b&=0xf;
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
if(twoBits<=1) {
// All 64 code points with this lead byte and middle trail byte
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return s-1;
}
} else {
// Look up the code point in its 4k block of code points.
UChar32 c=(b<<12)|(t1<<6)|t2;
if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) {
return s-1;
}
}
s+=2;
continue;
}
} else if( /* handle U+10000..U+10FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
) {
// Give an illegal sequence the same value as the result of contains(FFFD).
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80]
) != spanCondition
) {
return s-1;
}
s+=3;
continue;
}
} else /* 0xc0<=b<0xe0 */ {
if( /* handle U+0000..U+07FF inline */
(t1=(uint8_t)(*s-0x80)) <= 0x3f
) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
return s-1;
}
++s;
continue;
}
}
// Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) {
return s-1;
}
}
return limit0;
}
/*
* While going backwards through UTF-8 optimize only for ASCII.
* Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not
* possible to tell from the last byte in a multi-byte sequence how many
* preceding bytes there should be. Therefore, going backwards through UTF-8
* is much harder than going forward.
*/
int32_t
BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
}
uint8_t b;
do {
b=s[--length];
if((int8_t)b>=0) {
// ASCII sub-span
if(spanCondition) {
do {
if(!asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
} else {
do {
if(asciiBytes[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
}
}
int32_t prev=length;
UChar32 c;
if(b<0xc0) {
// trail byte: collect a multi-byte character
c=utf8_prevCharSafeBody(s, 0, &length, b, -1);
if(c<0) {
c=0xfffd;
}
} else {
// lead byte in last-trail position
c=0xfffd;
}
// c is a valid code point, not ASCII, not a surrogate
if(c<=0x7ff) {
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
return prev+1;
}
} else if(c<=0xffff) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
return prev+1;
}
} else {
// Look up the code point in its 4k block of code points.
if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) {
return prev+1;
}
}
} else {
if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) {
return prev+1;
}
}
} while(length>0);
return 0;
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,161 @@
/*
******************************************************************************
*
* Copyright (C) 2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: bmpset.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2007jan29
* created by: Markus W. Scherer
*/
#ifndef __BMPSET_H__
#define __BMPSET_H__
#include "unicode/utypes.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN
/*
* Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly.
*
* ASCII: Look up bytes.
* 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set.
*/
class BMPSet : public UMemory {
public:
BMPSet(const int32_t *parentList, int32_t parentListLength);
BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
virtual ~BMPSet();
virtual UBool contains(UChar32 c) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be s<limit and spanCondition==0 or 1.
* @return The string pointer which starts the span.
*/
const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
/*
* Span the initial substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The string pointer which limits the span.
*/
const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
/*
* Span the trailing substring for which each character c has spanCondition==contains(c).
* It must be length>0 and spanCondition==0 or 1.
* @return The start of the span.
*/
int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
private:
void initBits();
void overrideIllegal();
/**
* Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
* binary search is restricted for finding code points in a certain range.
*
* For restricting the search for finding in the range start..end,
* pass in
* lo=findCodePoint(start) and
* hi=findCodePoint(end)
* with 0<=lo<=hi<len.
* findCodePoint(c) defaults to lo=0 and hi=len-1.
*
* @param c a character in a subrange of MIN_VALUE..MAX_VALUE
* @param lo The lowest index to be returned.
* @param hi The highest index to be returned.
* @return the smallest integer i in the range lo..hi,
* inclusive, such that c < list[i]
*/
int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/*
* One byte per ASCII character, or trail byte in lead position.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
*/
UBool asciiBytes[0xc0];
/*
* One bit per code point from U+0000..U+07FF.
* The bits are organized vertically; consecutive code points
* correspond to the same bit positions in consecutive table words.
* With code point parts
* lead=c{10..6}
* trail=c{5..0}
* it is set.contains(c)==(table7FF[trail] bit lead)
*
* Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
* for faster validity checking at runtime.
*/
uint32_t table7FF[64];
/*
* One bit per 64 BMP code points.
* The bits are organized vertically; consecutive 64-code point blocks
* correspond to the same bit position in consecutive table words.
* With code point parts
* lead=c{15..12}
* t1=c{11..6}
* test bits (lead+16) and lead in bmpBlockBits[t1].
* If the upper bit is 0, then the lower bit indicates if contains(c)
* for all code points in the 64-block.
* If the upper bit is 1, then the block is mixed and set.contains(c)
* must be called.
*
* Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
* the result of contains(FFFD) for faster validity checking at runtime.
*/
uint32_t bmpBlockBits[64];
/*
* Inversion list indexes for restricted binary searches in
* findCodePoint(), from
* findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
* U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
* always looked up in the bit tables.
* The last pair of indexes is for finding supplementary code points.
*/
int32_t list4kStarts[18];
/*
* The inversion list of the parent set, for the slower contains() implementation
* for mixed BMP blocks and for supplementary code points.
* The list is terminated with list[listLength-1]=0x110000.
*/
const int32_t *list;
int32_t listLength;
};
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
return (UBool)(findCodePoint(c, lo, hi) & 1);
}
U_NAMESPACE_END
#endif

Просмотреть файл

@ -0,0 +1,337 @@
/*
************************************************************************************
* Copyright (C) 2006-2012, International Business Machines Corporation
* and others. All Rights Reserved.
************************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "dictbe.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ures.h"
#include "unicode/udata.h"
#include "unicode/putil.h"
#include "unicode/ustring.h"
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "uvector.h"
#include "umutex.h"
#include "uresimp.h"
#include "ubrkimpl.h"
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
LanguageBreakEngine::LanguageBreakEngine() {
}
LanguageBreakEngine::~LanguageBreakEngine() {
}
/*
******************************************************************
*/
LanguageBreakFactory::LanguageBreakFactory() {
}
LanguageBreakFactory::~LanguageBreakFactory() {
}
/*
******************************************************************
*/
UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
fHandled[i] = 0;
}
}
UnhandledEngine::~UnhandledEngine() {
for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
if (fHandled[i] != 0) {
delete fHandled[i];
}
}
}
UBool
UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
&& fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
}
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
UChar32 c = utext_current32(text);
if (reverse) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
c = utext_previous32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
}
}
return 0;
}
void
UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
if (fHandled[breakType] == 0) {
fHandled[breakType] = new UnicodeSet();
if (fHandled[breakType] == 0) {
return;
}
}
if (!fHandled[breakType]->contains(c)) {
UErrorCode status = U_ZERO_ERROR;
// Apply the entire script of the character.
int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
}
}
}
/*
******************************************************************
*/
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
fEngines = 0;
}
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
if (fEngines != 0) {
delete fEngines;
}
}
U_NAMESPACE_END
U_CDECL_BEGIN
static void U_CALLCONV _deleteEngine(void *obj) {
delete (const icu::LanguageBreakEngine *) obj;
}
U_CDECL_END
U_NAMESPACE_BEGIN
const LanguageBreakEngine *
ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
UBool needsInit;
int32_t i;
const LanguageBreakEngine *lbe = NULL;
UErrorCode status = U_ZERO_ERROR;
// TODO: The global mutex should not be used.
// The global mutex should only be used for short periods.
// A ICULanguageBreakFactory specific mutex should be used.
umtx_lock(NULL);
needsInit = (UBool)(fEngines == NULL);
if (!needsInit) {
i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
break;
}
lbe = NULL;
}
}
umtx_unlock(NULL);
if (lbe != NULL) {
return lbe;
}
if (needsInit) {
UStack *engines = new UStack(_deleteEngine, NULL, status);
if (U_SUCCESS(status) && engines == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
else if (U_FAILURE(status)) {
delete engines;
engines = NULL;
}
else {
umtx_lock(NULL);
if (fEngines == NULL) {
fEngines = engines;
engines = NULL;
}
umtx_unlock(NULL);
delete engines;
}
}
if (fEngines == NULL) {
return NULL;
}
// We didn't find an engine the first time through, or there was no
// stack. Create an engine.
const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
// Now get the lock, and see if someone else has created it in the
// meantime
umtx_lock(NULL);
i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
if (lbe != NULL && lbe->handles(c, breakType)) {
break;
}
lbe = NULL;
}
if (lbe == NULL && newlbe != NULL) {
fEngines->push((void *)newlbe, status);
lbe = newlbe;
newlbe = NULL;
}
umtx_unlock(NULL);
delete newlbe;
return lbe;
}
const LanguageBreakEngine *
ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
DictionaryMatcher *m = loadDictionaryMatcherFor(code, breakType);
if (m != NULL) {
const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(m, status);
break;
case USCRIPT_KHMER:
engine = new KhmerBreakEngine(m, status);
break;
#if !UCONFIG_NO_NORMALIZATION
// CJK not available w/o normalization
case USCRIPT_HANGUL:
engine = new CjkBreakEngine(m, kKorean, status);
break;
// use same BreakEngine and dictionary for both Chinese and Japanese
case USCRIPT_HIRAGANA:
case USCRIPT_KATAKANA:
case USCRIPT_HAN:
engine = new CjkBreakEngine(m, kChineseJapanese, status);
break;
#if 0
// TODO: Have to get some characters with script=common handled
// by CjkBreakEngine (e.g. U+309B). Simply subjecting
// them to CjkBreakEngine does not work. The engine has to
// special-case them.
case USCRIPT_COMMON:
{
UBlockCode block = ublock_getCode(code);
if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
engine = new CjkBreakEngine(dict, kChineseJapanese, status);
break;
}
#endif
#endif
default:
break;
}
if (engine == NULL) {
delete m;
}
else if (U_FAILURE(status)) {
delete engine;
engine = NULL;
}
return engine;
}
}
return NULL;
}
DictionaryMatcher *
ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script, int32_t /* brkType */) {
UErrorCode status = U_ZERO_ERROR;
// open root from brkitr tree.
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
int32_t dictnlength = 0;
const UChar *dictfname =
ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
CharString dictnbuf;
CharString ext;
const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
if (extStart != NULL) {
int32_t len = (int32_t)(extStart - dictfname);
ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
dictnlength = len;
}
dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
ures_close(b);
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
if (U_SUCCESS(status)) {
// build trie
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
const int32_t *indexes = (const int32_t *)data;
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
DictionaryMatcher *m = NULL;
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
const char *characters = (const char *)(data + offset);
m = new BytesDictionaryMatcher(characters, transform, file);
}
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
const UChar *characters = (const UChar *)(data + offset);
m = new UCharsDictionaryMatcher(characters, file);
}
if (m == NULL) {
// no matcher exists to take ownership - either we are an invalid
// type or memory allocation failed
udata_close(file);
}
return m;
} else if (dictfname != NULL) {
// we don't have a dictionary matcher.
// returning NULL here will cause us to fail to find a dictionary break engine, as expected
status = U_ZERO_ERROR;
return NULL;
}
return NULL;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

Просмотреть файл

@ -0,0 +1,289 @@
/**
************************************************************************************
* Copyright (C) 2006-2012, International Business Machines Corporation and others. *
* All Rights Reserved. *
************************************************************************************
*/
#ifndef BRKENG_H
#define BRKENG_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/utext.h"
#include "unicode/uscript.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class UStack;
class DictionaryMatcher;
/*******************************************************************
* LanguageBreakEngine
*/
/**
* <p>LanguageBreakEngines implement language-specific knowledge for
* finding text boundaries within a run of characters belonging to a
* specific set. The boundaries will be of a specific kind, e.g. word,
* line, etc.</p>
*
* <p>LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.</p>
*/
class LanguageBreakEngine : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakEngine();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* LanguageBreakFactory
*/
/**
* <p>LanguageBreakFactorys find and return a LanguageBreakEngine
* that can determine breaks for characters in a specific set, if
* such an object can be found.</p>
*
* <p>If a LanguageBreakFactory is to be shared between threads,
* appropriate synchronization must be used; there is none internal
* to the factory.</p>
*
* <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
* normally be shared between threads without synchronization, unless
* the specific subclass of LanguageBreakFactory indicates otherwise.</p>
*
* <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
* it returns when it itself is deleted, unless the specific subclass of
* LanguageBreakFactory indicates otherwise. Naturally, the factory should
* not be deleted until the LanguageBreakEngines it has returned are no
* longer needed.</p>
*/
class LanguageBreakFactory : public UMemory {
public:
/**
* <p>Default constructor.</p>
*
*/
LanguageBreakFactory();
/**
* <p>Virtual destructor.</p>
*/
virtual ~LanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
};
/*******************************************************************
* UnhandledEngine
*/
/**
* <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
* handles characters that no other LanguageBreakEngine is available to
* handle. It is told the character and the type of break; at its
* discretion it may handle more than the specified character (e.g.,
* the entire script to which that character belongs.</p>
*
* <p>UnhandledEngines may not be shared between threads without
* external synchronization.</p>
*/
class UnhandledEngine : public LanguageBreakEngine {
private:
/**
* The sets of characters handled, for each break type
* @internal
*/
UnicodeSet *fHandled[4];
public:
/**
* <p>Default constructor.</p>
*
*/
UnhandledEngine(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~UnhandledEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles(UChar32 c, int32_t breakType) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text (TODO: UText). The
* iterator is left at the end of the run of characters which the engine
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>
*
* @param c A character which the engine should handle
* @param breakType The type of text break for which the engine should handle c
*/
virtual void handleCharacter(UChar32 c, int32_t breakType);
};
/*******************************************************************
* ICULanguageBreakFactory
*/
/**
* <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
* ICU. It creates dictionary-based LanguageBreakEngines from dictionary
* data in the ICU data file.</p>
*/
class ICULanguageBreakFactory : public LanguageBreakFactory {
private:
/**
* The stack of break engines created by this factory
* @internal
*/
UStack *fEngines;
public:
/**
* <p>Standard constructor.</p>
*
*/
ICULanguageBreakFactory(UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ICULanguageBreakFactory();
/**
* <p>Find and return a LanguageBreakEngine that can find the desired
* kind of break for the set of characters to which the supplied
* character belongs. It is up to the set of available engines to
* determine what the sets of characters are.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
protected:
/**
* <p>Create a LanguageBreakEngine for the set of characters to which
* the supplied character belongs, for the specified break type.</p>
*
* @param c A character that begins a run for which a LanguageBreakEngine is
* sought.
* @param breakType The kind of text break for which a LanguageBreakEngine is
* sought.
* @return A LanguageBreakEngine with the desired characteristics, or 0.
*/
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
/**
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
* @param script An ISO 15924 script code that identifies the dictionary to be
* created.
* @param breakType The kind of text break for which a dictionary is
* sought.
* @return A DictionaryMatcher with the desired characteristics, or NULL.
*/
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
};
U_NAMESPACE_END
/* BRKENG_H */
#endif

Просмотреть файл

@ -0,0 +1,451 @@
/*
*******************************************************************************
* Copyright (C) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File TXTBDRY.CPP
*
* Modification History:
*
* Date Name Description
* 02/18/97 aliu Converted from OpenClass. Added DONE.
* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
*****************************************************************************************
*/
// *****************************************************************************
// This file was generated from the java source file BreakIterator.java
// *****************************************************************************
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/brkiter.h"
#include "unicode/udata.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "ucln_cmn.h"
#include "cstring.h"
#include "umutex.h"
#include "servloc.h"
#include "locbased.h"
#include "uresimp.h"
#include "uassert.h"
#include "ubrkimpl.h"
// *****************************************************************************
// class BreakIterator
// This class implements methods for finding the location of boundaries in text.
// Instances of BreakIterator maintain a current position and scan over text
// returning the index of characters where boundaries occur.
// *****************************************************************************
U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
char actualLocale[ULOC_FULLNAME_CAPACITY];
int32_t size;
const UChar* brkfname = NULL;
UResourceBundle brkRulesStack;
UResourceBundle brkNameStack;
UResourceBundle *brkRules = &brkRulesStack;
UResourceBundle *brkName = &brkNameStack;
RuleBasedBreakIterator *result = NULL;
if (U_FAILURE(status))
return NULL;
ures_initStackObject(brkRules);
ures_initStackObject(brkName);
// Get the locale
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status);
/* this is a hack for now. Should be fixed when the data is fetched from
brk_index.txt */
if(status==U_USING_DEFAULT_WARNING){
status=U_ZERO_ERROR;
ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status);
}
// Get the "boundaries" array.
if (U_SUCCESS(status)) {
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
// Get the string object naming the rules file
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if ((size_t)size>=sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
}
}
// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
uprv_strncpy(actualLocale,
ures_getLocaleInternal(brkName, &status),
sizeof(actualLocale)/sizeof(actualLocale[0]));
UChar* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if(extStart!=NULL){
len = (int)(extStart-brkfname);
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
fnbuff[len]=0; // nul terminate
}
}
ures_close(brkRules);
ures_close(brkName);
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale);
result->setBreakType(kind);
}
ures_close(b);
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
delete result;
return NULL;
}
if (result == NULL) {
udata_close(file);
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
return result;
}
// Creates a break iterator for word breaks.
BreakIterator* U_EXPORT2
BreakIterator::createWordInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_WORD, status);
}
// -------------------------------------
// Creates a break iterator for line breaks.
BreakIterator* U_EXPORT2
BreakIterator::createLineInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_LINE, status);
}
// -------------------------------------
// Creates a break iterator for character breaks.
BreakIterator* U_EXPORT2
BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_CHARACTER, status);
}
// -------------------------------------
// Creates a break iterator for sentence breaks.
BreakIterator* U_EXPORT2
BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_SENTENCE, status);
}
// -------------------------------------
// Creates a break iterator for title casing breaks.
BreakIterator* U_EXPORT2
BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status)
{
return createInstance(key, UBRK_TITLE, status);
}
// -------------------------------------
// Gets all the available locales that has localized text boundary data.
const Locale* U_EXPORT2
BreakIterator::getAvailableLocales(int32_t& count)
{
return Locale::getAvailableLocales(count);
}
// ------------------------------------------
//
// Default constructor and destructor
//
//-------------------------------------------
BreakIterator::BreakIterator()
{
fBufferClone = FALSE;
*validLocale = *actualLocale = 0;
}
BreakIterator::~BreakIterator()
{
}
// ------------------------------------------
//
// Registration
//
//-------------------------------------------
#if !UCONFIG_NO_SERVICE
// -------------------------------------
class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
public:
virtual ~ICUBreakIteratorFactory();
protected:
virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
return BreakIterator::makeInstance(loc, kind, status);
}
};
ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
// -------------------------------------
class ICUBreakIteratorService : public ICULocaleService {
public:
ICUBreakIteratorService()
: ICULocaleService(UNICODE_STRING("Break Iterator", 14))
{
UErrorCode status = U_ZERO_ERROR;
registerFactory(new ICUBreakIteratorFactory(), status);
}
virtual ~ICUBreakIteratorService();
virtual UObject* cloneInstance(UObject* instance) const {
return ((BreakIterator*)instance)->clone();
}
virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
LocaleKey& lkey = (LocaleKey&)key;
int32_t kind = lkey.kind();
Locale loc;
lkey.currentLocale(loc);
return BreakIterator::makeInstance(loc, kind, status);
}
virtual UBool isDefault() const {
return countFactories() == 1;
}
};
ICUBreakIteratorService::~ICUBreakIteratorService() {}
// -------------------------------------
U_NAMESPACE_END
// defined in ucln_cmn.h
static icu::ICULocaleService* gService = NULL;
/**
* Release all static memory held by breakiterator.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV breakiterator_cleanup(void) {
#if !UCONFIG_NO_SERVICE
if (gService) {
delete gService;
gService = NULL;
}
#endif
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
static ICULocaleService*
getService(void)
{
UBool needsInit;
UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit);
if (needsInit) {
ICULocaleService *tService = new ICUBreakIteratorService();
umtx_lock(NULL);
if (gService == NULL) {
gService = tService;
tService = NULL;
ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup);
}
umtx_unlock(NULL);
delete tService;
}
return gService;
}
// -------------------------------------
static inline UBool
hasService(void)
{
UBool retVal;
UMTX_CHECK(NULL, gService != NULL, retVal);
return retVal;
}
// -------------------------------------
URegistryKey U_EXPORT2
BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status)
{
ICULocaleService *service = getService();
if (service == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return service->registerInstance(toAdopt, locale, kind, status);
}
// -------------------------------------
UBool U_EXPORT2
BreakIterator::unregister(URegistryKey key, UErrorCode& status)
{
if (U_SUCCESS(status)) {
if (hasService()) {
return gService->unregister(key, status);
}
status = U_MEMORY_ALLOCATION_ERROR;
}
return FALSE;
}
// -------------------------------------
StringEnumeration* U_EXPORT2
BreakIterator::getAvailableLocales(void)
{
ICULocaleService *service = getService();
if (service == NULL) {
return NULL;
}
return service->getAvailableLocales();
}
#endif /* UCONFIG_NO_SERVICE */
// -------------------------------------
BreakIterator*
BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
#if !UCONFIG_NO_SERVICE
if (hasService()) {
Locale actualLoc("");
BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status);
// TODO: The way the service code works in ICU 2.8 is that if
// there is a real registered break iterator, the actualLoc
// will be populated, but if the handleDefault path is taken
// (because nothing is registered that can handle the
// requested locale) then the actualLoc comes back empty. In
// that case, the returned object already has its actual/valid
// locale data populated (by makeInstance, which is what
// handleDefault calls), so we don't touch it. YES, A COMMENT
// THIS LONG is a sign of bad code -- so the action item is to
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
}
return result;
}
else
#endif
{
return makeInstance(loc, kind, status);
}
}
// -------------------------------------
BreakIterator*
BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
{
if (U_FAILURE(status)) {
return NULL;
}
BreakIterator *result = NULL;
switch (kind) {
case UBRK_CHARACTER:
result = BreakIterator::buildInstance(loc, "grapheme", kind, status);
break;
case UBRK_WORD:
result = BreakIterator::buildInstance(loc, "word", kind, status);
break;
case UBRK_LINE:
result = BreakIterator::buildInstance(loc, "line", kind, status);
break;
case UBRK_SENTENCE:
result = BreakIterator::buildInstance(loc, "sentence", kind, status);
break;
case UBRK_TITLE:
result = BreakIterator::buildInstance(loc, "title", kind, status);
break;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
}
if (U_FAILURE(status)) {
return NULL;
}
return result;
}
Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocale(type, status);
}
const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
U_LOCALE_BASED(locBased, *this);
return locBased.getLocaleID(type, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
//eof

Просмотреть файл

@ -0,0 +1,77 @@
// Copyright (C) 2009-2011, International Business Machines
// Corporation and others. All Rights Reserved.
//
// Copyright 2007 Google Inc. All Rights Reserved.
// Author: sanjay@google.com (Sanjay Ghemawat)
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
ByteSink::~ByteSink() {}
char* ByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch, int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
*result_capacity = scratch_capacity;
return scratch;
}
void ByteSink::Flush() {}
CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity)
: outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity),
size_(0), appended_(0), overflowed_(FALSE) {
}
CheckedArrayByteSink::~CheckedArrayByteSink() {}
CheckedArrayByteSink& CheckedArrayByteSink::Reset() {
size_ = appended_ = 0;
overflowed_ = FALSE;
return *this;
}
void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) {
return;
}
appended_ += n;
int32_t available = capacity_ - size_;
if (n > available) {
n = available;
overflowed_ = TRUE;
}
if (n > 0 && bytes != (outbuf_ + size_)) {
uprv_memcpy(outbuf_ + size_, bytes, n);
}
size_ += n;
}
char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity,
int32_t /*desired_capacity_hint*/,
char* scratch,
int32_t scratch_capacity,
int32_t* result_capacity) {
if (min_capacity < 1 || scratch_capacity < min_capacity) {
*result_capacity = 0;
return NULL;
}
int32_t available = capacity_ - size_;
if (available >= min_capacity) {
*result_capacity = available;
return outbuf_ + size_;
} else {
*result_capacity = scratch_capacity;
return scratch;
}
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,439 @@
/*
*******************************************************************************
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestrie.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/bytestrie.h"
#include "unicode/uobject.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
BytesTrie::~BytesTrie() {
uprv_free(ownedArray_);
}
// lead byte already shifted right by 1.
int32_t
BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
int32_t value;
if(leadByte<kMinTwoByteValueLead) {
value=leadByte-kMinOneByteValueLead;
} else if(leadByte<kMinThreeByteValueLead) {
value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
} else if(leadByte<kFourByteValueLead) {
value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
} else if(leadByte==kFourByteValueLead) {
value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
} else {
value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
}
return value;
}
const uint8_t *
BytesTrie::jumpByDelta(const uint8_t *pos) {
int32_t delta=*pos++;
if(delta<kMinTwoByteDeltaLead) {
// nothing to do
} else if(delta<kMinThreeByteDeltaLead) {
delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
} else if(delta<kFourByteDeltaLead) {
delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
pos+=2;
} else if(delta==kFourByteDeltaLead) {
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
pos+=3;
} else {
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
pos+=4;
}
return pos+delta;
}
UStringTrieResult
BytesTrie::current() const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
} else {
int32_t node;
return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
}
UStringTrieResult
BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
// Branch according to the current byte.
if(length==0) {
length=*pos++;
}
++length;
// The length of the branch is the number of bytes to select from.
// The data structure encodes a binary search.
while(length>kMaxBranchLinearSubNodeLength) {
if(inByte<*pos++) {
length>>=1;
pos=jumpByDelta(pos);
} else {
length=length-(length>>1);
pos=skipDelta(pos);
}
}
// Drop down to linear search for the last few bytes.
// length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
// and divides length by 2.
do {
if(inByte==*pos++) {
UStringTrieResult result;
int32_t node=*pos;
U_ASSERT(node>=kMinValueLead);
if(node&kValueIsFinal) {
// Leave the final value for getValue() to read.
result=USTRINGTRIE_FINAL_VALUE;
} else {
// Use the non-final value as the jump delta.
++pos;
// int32_t delta=readValue(pos, node>>1);
node>>=1;
int32_t delta;
if(node<kMinTwoByteValueLead) {
delta=node-kMinOneByteValueLead;
} else if(node<kMinThreeByteValueLead) {
delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
} else if(node<kFourByteValueLead) {
delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
pos+=2;
} else if(node==kFourByteValueLead) {
delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
pos+=3;
} else {
delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
pos+=4;
}
// end readValue()
pos+=delta;
node=*pos;
result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
}
pos_=pos;
return result;
}
--length;
pos=skipValue(pos);
} while(length>1);
if(inByte==*pos++) {
pos_=pos;
int32_t node=*pos;
return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return USTRINGTRIE_NO_MATCH;
}
}
UStringTrieResult
BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
return branchNext(pos, node, inByte);
} else if(node<kMinValueLead) {
// Match the first of length+1 bytes.
int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
if(inByte==*pos++) {
remainingMatchLength_=--length;
pos_=pos;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
// No match.
break;
}
} else if(node&kValueIsFinal) {
// No further matching bytes.
break;
} else {
// Skip intermediate value.
pos=skipValue(pos, node);
// The next node must not also be a value node.
U_ASSERT(*pos<kMinValueLead);
}
}
stop();
return USTRINGTRIE_NO_MATCH;
}
UStringTrieResult
BytesTrie::next(int32_t inByte) {
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
}
if(inByte<0) {
inByte+=0x100;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
// Remaining part of a linear-match node.
if(inByte==*pos++) {
remainingMatchLength_=--length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
} else {
stop();
return USTRINGTRIE_NO_MATCH;
}
}
return nextImpl(pos, inByte);
}
UStringTrieResult
BytesTrie::next(const char *s, int32_t sLength) {
if(sLength<0 ? *s==0 : sLength==0) {
// Empty input.
return current();
}
const uint8_t *pos=pos_;
if(pos==NULL) {
return USTRINGTRIE_NO_MATCH;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
for(;;) {
// Fetch the next input byte, if there is one.
// Continue a linear-match node without rechecking sLength<0.
int32_t inByte;
if(sLength<0) {
for(;;) {
if((inByte=*s++)==0) {
remainingMatchLength_=length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
if(length<0) {
remainingMatchLength_=length;
break;
}
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
}
} else {
for(;;) {
if(sLength==0) {
remainingMatchLength_=length;
pos_=pos;
int32_t node;
return (length<0 && (node=*pos)>=kMinValueLead) ?
valueResult(node) : USTRINGTRIE_NO_VALUE;
}
inByte=*s++;
--sLength;
if(length<0) {
remainingMatchLength_=length;
break;
}
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
}
}
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
UStringTrieResult result=branchNext(pos, node, inByte);
if(result==USTRINGTRIE_NO_MATCH) {
return USTRINGTRIE_NO_MATCH;
}
// Fetch the next input byte, if there is one.
if(sLength<0) {
if((inByte=*s++)==0) {
return result;
}
} else {
if(sLength==0) {
return result;
}
inByte=*s++;
--sLength;
}
if(result==USTRINGTRIE_FINAL_VALUE) {
// No further matching bytes.
stop();
return USTRINGTRIE_NO_MATCH;
}
pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
} else if(node<kMinValueLead) {
// Match length+1 bytes.
length=node-kMinLinearMatch; // Actual match length minus 1.
if(inByte!=*pos) {
stop();
return USTRINGTRIE_NO_MATCH;
}
++pos;
--length;
break;
} else if(node&kValueIsFinal) {
// No further matching bytes.
stop();
return USTRINGTRIE_NO_MATCH;
} else {
// Skip intermediate value.
pos=skipValue(pos, node);
// The next node must not also be a value node.
U_ASSERT(*pos<kMinValueLead);
}
}
}
}
const uint8_t *
BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
UBool haveUniqueValue, int32_t &uniqueValue) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
return NULL;
}
length=length-(length>>1);
pos=skipDelta(pos);
}
do {
++pos; // ignore a comparison byte
// handle its value
int32_t node=*pos++;
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
if(isFinal) {
if(haveUniqueValue) {
if(value!=uniqueValue) {
return NULL;
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
}
} else {
if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
return NULL;
}
haveUniqueValue=TRUE;
}
} while(--length>1);
return pos+1; // ignore the last comparison byte
}
UBool
BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
for(;;) {
int32_t node=*pos++;
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
if(pos==NULL) {
return FALSE;
}
haveUniqueValue=TRUE;
} else if(node<kMinValueLead) {
// linear-match node
pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
} else {
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
if(haveUniqueValue) {
if(value!=uniqueValue) {
return FALSE;
}
} else {
uniqueValue=value;
haveUniqueValue=TRUE;
}
if(isFinal) {
return TRUE;
}
pos=skipValue(pos, node);
}
}
}
int32_t
BytesTrie::getNextBytes(ByteSink &out) const {
const uint8_t *pos=pos_;
if(pos==NULL) {
return 0;
}
if(remainingMatchLength_>=0) {
append(out, *pos); // Next byte of a pending linear-match node.
return 1;
}
int32_t node=*pos++;
if(node>=kMinValueLead) {
if(node&kValueIsFinal) {
return 0;
} else {
pos=skipValue(pos, node);
node=*pos++;
U_ASSERT(node<kMinValueLead);
}
}
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
getNextBranchBytes(pos, ++node, out);
return node;
} else {
// First byte of the linear-match node.
append(out, *pos);
return 1;
}
}
void
BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
getNextBranchBytes(jumpByDelta(pos), length>>1, out);
length=length-(length>>1);
pos=skipDelta(pos);
}
do {
append(out, *pos++);
pos=skipValue(pos);
} while(--length>1);
append(out, *pos);
}
void
BytesTrie::append(ByteSink &out, int c) {
char ch=(char)c;
out.Append(&ch, 1);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,501 @@
/*
*******************************************************************************
* Copyright (C) 2010-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestriebuilder.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010sep25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/bytestriebuilder.h"
#include "unicode/stringpiece.h"
#include "charstr.h"
#include "cmemory.h"
#include "uhash.h"
#include "uarrsort.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
/*
* Note: This builder implementation stores (bytes, value) pairs with full copies
* of the byte sequences, until the BytesTrie is built.
* It might(!) take less memory if we collected the data in a temporary, dynamic trie.
*/
class BytesTrieElement : public UMemory {
public:
// Use compiler's default constructor, initializes nothing.
void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
StringPiece getString(const CharString &strings) const {
int32_t offset=stringOffset;
int32_t length;
if(offset>=0) {
length=(uint8_t)strings[offset++];
} else {
offset=~offset;
length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
offset+=2;
}
return StringPiece(strings.data()+offset, length);
}
int32_t getStringLength(const CharString &strings) const {
int32_t offset=stringOffset;
if(offset>=0) {
return (uint8_t)strings[offset];
} else {
offset=~offset;
return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
}
}
char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
int32_t getValue() const { return value; }
int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
private:
const char *data(const CharString &strings) const {
int32_t offset=stringOffset;
if(offset>=0) {
++offset;
} else {
offset=~offset+2;
}
return strings.data()+offset;
}
// If the stringOffset is non-negative, then the first strings byte contains
// the string length.
// If the stringOffset is negative, then the first two strings bytes contain
// the string length (big-endian), and the offset needs to be bit-inverted.
// (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
int32_t stringOffset;
int32_t value;
};
void
BytesTrieElement::setTo(const StringPiece &s, int32_t val,
CharString &strings, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
int32_t length=s.length();
if(length>0xffff) {
// Too long: We store the length in 1 or 2 bytes.
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
int32_t offset=strings.length();
if(length>0xff) {
offset=~offset;
strings.append((char)(length>>8), errorCode);
}
strings.append((char)length, errorCode);
stringOffset=offset;
value=val;
strings.append(s, errorCode);
}
int32_t
BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
// TODO: add StringPiece::compare(), see ticket #8187
StringPiece thisString=getString(strings);
StringPiece otherString=other.getString(strings);
int32_t lengthDiff=thisString.length()-otherString.length();
int32_t commonLength;
if(lengthDiff<=0) {
commonLength=thisString.length();
} else {
commonLength=otherString.length();
}
int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
return diff!=0 ? diff : lengthDiff;
}
BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
: strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
bytes(NULL), bytesCapacity(0), bytesLength(0) {
if(U_FAILURE(errorCode)) {
return;
}
strings=new CharString();
if(strings==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrieBuilder::~BytesTrieBuilder() {
delete strings;
delete[] elements;
uprv_free(bytes);
}
BytesTrieBuilder &
BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(bytesLength>0) {
// Cannot add elements after building.
errorCode=U_NO_WRITE_PERMISSION;
return *this;
}
if(elementsLength==elementsCapacity) {
int32_t newCapacity;
if(elementsCapacity==0) {
newCapacity=1024;
} else {
newCapacity=4*elementsCapacity;
}
BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
if(newElements==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return *this; // error instead of dereferencing null
}
if(elementsLength>0) {
uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
}
delete[] elements;
elements=newElements;
elementsCapacity=newCapacity;
}
elements[elementsLength++].setTo(s, value, *strings, errorCode);
return *this;
}
U_CDECL_BEGIN
static int32_t U_CALLCONV
compareElementStrings(const void *context, const void *left, const void *right) {
const CharString *strings=static_cast<const CharString *>(context);
const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
return leftElement->compareStringTo(*rightElement, *strings);
}
U_CDECL_END
BytesTrie *
BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
buildBytes(buildOption, errorCode);
BytesTrie *newTrie=NULL;
if(U_SUCCESS(errorCode)) {
newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
if(newTrie==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
} else {
bytes=NULL; // The new trie now owns the array.
bytesCapacity=0;
}
}
return newTrie;
}
StringPiece
BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
buildBytes(buildOption, errorCode);
StringPiece result;
if(U_SUCCESS(errorCode)) {
result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
}
return result;
}
void
BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return;
}
if(bytes!=NULL && bytesLength>0) {
// Already built.
return;
}
if(bytesLength==0) {
if(elementsLength==0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
compareElementStrings, strings,
FALSE, // need not be a stable sort
&errorCode);
if(U_FAILURE(errorCode)) {
return;
}
// Duplicate strings are not allowed.
StringPiece prev=elements[0].getString(*strings);
for(int32_t i=1; i<elementsLength; ++i) {
StringPiece current=elements[i].getString(*strings);
if(prev==current) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
prev=current;
}
}
// Create and byte-serialize the trie for the elements.
bytesLength=0;
int32_t capacity=strings->length();
if(capacity<1024) {
capacity=1024;
}
if(bytesCapacity<capacity) {
uprv_free(bytes);
bytes=static_cast<char *>(uprv_malloc(capacity));
if(bytes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
bytesCapacity=0;
return;
}
bytesCapacity=capacity;
}
StringTrieBuilder::build(buildOption, elementsLength, errorCode);
if(bytes==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrieBuilder &
BytesTrieBuilder::clear() {
strings->clear();
elementsLength=0;
bytesLength=0;
return *this;
}
int32_t
BytesTrieBuilder::getElementStringLength(int32_t i) const {
return elements[i].getStringLength(*strings);
}
UChar
BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
return (uint8_t)elements[i].charAt(byteIndex, *strings);
}
int32_t
BytesTrieBuilder::getElementValue(int32_t i) const {
return elements[i].getValue();
}
int32_t
BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
const BytesTrieElement &firstElement=elements[first];
const BytesTrieElement &lastElement=elements[last];
int32_t minStringLength=firstElement.getStringLength(*strings);
while(++byteIndex<minStringLength &&
firstElement.charAt(byteIndex, *strings)==
lastElement.charAt(byteIndex, *strings)) {}
return byteIndex;
}
int32_t
BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
int32_t length=0; // Number of different bytes at byteIndex.
int32_t i=start;
do {
char byte=elements[i++].charAt(byteIndex, *strings);
while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
++i;
}
++length;
} while(i<limit);
return length;
}
int32_t
BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
do {
char byte=elements[i++].charAt(byteIndex, *strings);
while(byte==elements[i].charAt(byteIndex, *strings)) {
++i;
}
} while(--count>0);
return i;
}
int32_t
BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
char b=(char)byte;
while(b==elements[i].charAt(byteIndex, *strings)) {
++i;
}
return i;
}
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
hash=hash*37+ustr_hashCharsN(bytes, len);
}
UBool
BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
if(this==&other) {
return TRUE;
}
if(!LinearMatchNode::operator==(other)) {
return FALSE;
}
const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
return 0==uprv_memcmp(s, o.s, length);
}
void
BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
next->write(builder);
b.write(s, length);
offset=b.write(b.getMinLinearMatch()+length-1);
}
StringTrieBuilder::Node *
BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
Node *nextNode) const {
return new BTLinearMatchNode(
elements[i].getString(*strings).data()+byteIndex,
length,
nextNode);
}
UBool
BytesTrieBuilder::ensureCapacity(int32_t length) {
if(bytes==NULL) {
return FALSE; // previous memory allocation had failed
}
if(length>bytesCapacity) {
int32_t newCapacity=bytesCapacity;
do {
newCapacity*=2;
} while(newCapacity<=length);
char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
if(newBytes==NULL) {
// unable to allocate memory
uprv_free(bytes);
bytes=NULL;
bytesCapacity=0;
return FALSE;
}
uprv_memcpy(newBytes+(newCapacity-bytesLength),
bytes+(bytesCapacity-bytesLength), bytesLength);
uprv_free(bytes);
bytes=newBytes;
bytesCapacity=newCapacity;
}
return TRUE;
}
int32_t
BytesTrieBuilder::write(int32_t byte) {
int32_t newLength=bytesLength+1;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
bytes[bytesCapacity-bytesLength]=(char)byte;
}
return bytesLength;
}
int32_t
BytesTrieBuilder::write(const char *b, int32_t length) {
int32_t newLength=bytesLength+length;
if(ensureCapacity(newLength)) {
bytesLength=newLength;
uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
}
return bytesLength;
}
int32_t
BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
return write(elements[i].getString(*strings).data()+byteIndex, length);
}
int32_t
BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
}
char intBytes[5];
int32_t length=1;
if(i<0 || i>0xffffff) {
intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
intBytes[1]=(char)((uint32_t)i>>24);
intBytes[2]=(char)((uint32_t)i>>16);
intBytes[3]=(char)((uint32_t)i>>8);
intBytes[4]=(char)i;
length=5;
// } else if(i<=BytesTrie::kMaxOneByteValue) {
// intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
} else {
if(i<=BytesTrie::kMaxTwoByteValue) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
} else {
if(i<=BytesTrie::kMaxThreeByteValue) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
} else {
intBytes[0]=(char)BytesTrie::kFourByteValueLead;
intBytes[1]=(char)(i>>16);
length=2;
}
intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
}
intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
return write(intBytes, length);
}
int32_t
BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
int32_t offset=write(node);
if(hasValue) {
offset=writeValueAndFinal(value, FALSE);
}
return offset;
}
int32_t
BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
int32_t i=bytesLength-jumpTarget;
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
}
char intBytes[5];
int32_t length;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
}
intBytes[1]=(char)(i>>16);
}
intBytes[1]=(char)(i>>8);
}
intBytes[length++]=(char)i;
return write(intBytes, length);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,210 @@
/*
*******************************************************************************
* Copyright (C) 2010-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: bytestrieiterator.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010nov03
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/stringpiece.h"
#include "charstr.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(static_cast<const uint8_t *>(trieBytes)),
pos_(bytes_), initialPos_(bytes_),
remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
if(U_FAILURE(errorCode)) {
return;
}
// str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
// a public API header for which we would want it to depend only on
// other public headers.
// Unlike BytesTrie itself, its Iterator performs memory allocations anyway
// via the CharString and UVector32 implementations, so this additional
// cost is minimal.
str_=new CharString();
stack_=new UVector32(errorCode);
if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
UErrorCode &errorCode)
: bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
remainingMatchLength_(trie.remainingMatchLength_),
initialRemainingMatchLength_(trie.remainingMatchLength_),
str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
if(U_FAILURE(errorCode)) {
return;
}
str_=new CharString();
stack_=new UVector32(errorCode);
if(U_FAILURE(errorCode)) {
return;
}
if(str_==NULL || stack_==NULL) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
if(length>=0) {
// Pending linear-match node, append remaining bytes to str_.
++length;
if(maxLength_>0 && length>maxLength_) {
length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
}
str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
pos_+=length;
remainingMatchLength_-=length;
}
}
BytesTrie::Iterator::~Iterator() {
delete str_;
delete stack_;
}
BytesTrie::Iterator &
BytesTrie::Iterator::reset() {
pos_=initialPos_;
remainingMatchLength_=initialRemainingMatchLength_;
int32_t length=remainingMatchLength_+1; // Remaining match length.
if(maxLength_>0 && length>maxLength_) {
length=maxLength_;
}
str_->truncate(length);
pos_+=length;
remainingMatchLength_-=length;
stack_->setSize(0);
return *this;
}
UBool
BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
UBool
BytesTrie::Iterator::next(UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const uint8_t *pos=pos_;
if(pos==NULL) {
if(stack_->isEmpty()) {
return FALSE;
}
// Pop the state off the stack and continue with the next outbound edge of
// the branch node.
int32_t stackSize=stack_->size();
int32_t length=stack_->elementAti(stackSize-1);
pos=bytes_+stack_->elementAti(stackSize-2);
stack_->setSize(stackSize-2);
str_->truncate(length&0xffff);
length=(int32_t)((uint32_t)length>>16);
if(length>1) {
pos=branchNext(pos, length, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
}
} else {
str_->append((char)*pos++, errorCode);
}
}
if(remainingMatchLength_>=0) {
// We only get here if we started in a pending linear-match node
// with more than maxLength remaining bytes.
return truncateAndStop();
}
for(;;) {
int32_t node=*pos++;
if(node>=kMinValueLead) {
// Deliver value for the byte sequence so far.
UBool isFinal=(UBool)(node&kValueIsFinal);
value_=readValue(pos, node>>1);
if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
pos_=NULL;
} else {
pos_=skipValue(pos, node);
}
sp_.set(str_->data(), str_->length());
return TRUE;
}
if(maxLength_>0 && str_->length()==maxLength_) {
return truncateAndStop();
}
if(node<kMinLinearMatch) {
if(node==0) {
node=*pos++;
}
pos=branchNext(pos, node+1, errorCode);
if(pos==NULL) {
return TRUE; // Reached a final value.
}
} else {
// Linear-match node, append length bytes to str_.
int32_t length=node-kMinLinearMatch+1;
if(maxLength_>0 && str_->length()+length>maxLength_) {
str_->append(reinterpret_cast<const char *>(pos),
maxLength_-str_->length(), errorCode);
return truncateAndStop();
}
str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
pos+=length;
}
}
}
UBool
BytesTrie::Iterator::truncateAndStop() {
pos_=NULL;
sp_.set(str_->data(), str_->length());
value_=-1; // no real value for str
return TRUE;
}
// Branch node, needs to take the first outbound edge and push state for the rest.
const uint8_t *
BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
while(length>kMaxBranchLinearSubNodeLength) {
++pos; // ignore the comparison byte
// Push state for the greater-or-equal edge.
stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
// Follow the less-than edge.
length>>=1;
pos=jumpByDelta(pos);
}
// List of key-value pairs where values are either final values or jump deltas.
// Read the first (key, value) pair.
uint8_t trieByte=*pos++;
int32_t node=*pos++;
UBool isFinal=(UBool)(node&kValueIsFinal);
int32_t value=readValue(pos, node>>1);
pos=skipValue(pos, node);
stack_->addElement((int32_t)(pos-bytes_), errorCode);
stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
str_->append((char)trieByte, errorCode);
if(isFinal) {
pos_=NULL;
sp_.set(str_->data(), str_->length());
value_=value;
return NULL;
} else {
return pos+value;
}
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,577 @@
/*
*****************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*****************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/caniter.h"
#include "unicode/normalizer2.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/usetiter.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "hash.h"
#include "normalizer2impl.h"
/**
* This class allows one to iterate through all the strings that are canonically equivalent to a given
* string. For example, here are some sample results:
Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
1: \u0041\u030A\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
2: \u0041\u030A\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
3: \u0041\u030A\u1E0B\u0327
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
4: \u0041\u030A\u1E11\u0307
= {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
5: \u00C5\u0064\u0307\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
6: \u00C5\u0064\u0327\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
7: \u00C5\u1E0B\u0327
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
8: \u00C5\u1E11\u0307
= {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
9: \u212B\u0064\u0307\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
10: \u212B\u0064\u0327\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
11: \u212B\u1E0B\u0327
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
12: \u212B\u1E11\u0307
= {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
*<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
* since it has not been optimized for that situation.
*@author M. Davis
*@draft
*/
// public
U_NAMESPACE_BEGIN
// TODO: add boilerplate methods.
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
/**
*@param source string to get results for
*/
CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
pieces(NULL),
pieces_length(0),
pieces_lengths(NULL),
current(NULL),
current_length(0),
nfd(*Normalizer2Factory::getNFDInstance(status)),
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
{
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
setSource(sourceStr, status);
}
}
CanonicalIterator::~CanonicalIterator() {
cleanPieces();
}
void CanonicalIterator::cleanPieces() {
int32_t i = 0;
if(pieces != NULL) {
for(i = 0; i < pieces_length; i++) {
if(pieces[i] != NULL) {
delete[] pieces[i];
}
}
uprv_free(pieces);
pieces = NULL;
pieces_length = 0;
}
if(pieces_lengths != NULL) {
uprv_free(pieces_lengths);
pieces_lengths = NULL;
}
if(current != NULL) {
uprv_free(current);
current = NULL;
current_length = 0;
}
}
/**
*@return gets the source: NOTE: it is the NFD form of source
*/
UnicodeString CanonicalIterator::getSource() {
return source;
}
/**
* Resets the iterator so that one can start again from the beginning.
*/
void CanonicalIterator::reset() {
done = FALSE;
for (int i = 0; i < current_length; ++i) {
current[i] = 0;
}
}
/**
*@return the next string that is canonically equivalent. The value null is returned when
* the iteration is done.
*/
UnicodeString CanonicalIterator::next() {
int32_t i = 0;
if (done) {
buffer.setToBogus();
return buffer;
}
// delete old contents
buffer.remove();
// construct return value
for (i = 0; i < pieces_length; ++i) {
buffer.append(pieces[i][current[i]]);
}
//String result = buffer.toString(); // not needed
// find next value for next time
for (i = current_length - 1; ; --i) {
if (i < 0) {
done = TRUE;
break;
}
current[i]++;
if (current[i] < pieces_lengths[i]) break; // got sequence
current[i] = 0;
}
return buffer;
}
/**
*@param set the source string to iterate against. This allows the same iterator to be used
* while changing the source string, saving object creation.
*/
void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
int32_t list_length = 0;
UChar32 cp = 0;
int32_t start = 0;
int32_t i = 0;
UnicodeString *list = NULL;
nfd.normalize(newSource, source, status);
if(U_FAILURE(status)) {
return;
}
done = FALSE;
cleanPieces();
// catch degenerate case
if (newSource.length() == 0) {
pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
pieces_length = 1;
current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
current_length = 1;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
current[0] = 0;
pieces[0] = new UnicodeString[1];
pieces_lengths[0] = 1;
if (pieces[0] == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
return;
}
list = new UnicodeString[source.length()];
if (list == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
// i should initialy be the number of code units at the
// start of the string
i = U16_LENGTH(source.char32At(0));
//int32_t i = 1;
// find the segments
// This code iterates through the source string and
// extracts segments that end up on a codepoint that
// doesn't start any decompositions. (Analysis is done
// on the NFD form - see above).
for (; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
if (nfcImpl.isCanonSegmentStarter(cp)) {
source.extract(start, i-start, list[list_length++]); // add up to i
start = i;
}
}
source.extract(start, i-start, list[list_length++]); // add last one
// allocate the arrays, and find the strings that are CE to each segment
pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
pieces_length = list_length;
pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
current_length = list_length;
if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto CleanPartialInitialization;
}
for (i = 0; i < current_length; i++) {
current[i] = 0;
}
// for each segment, get all the combinations that can produce
// it after NFD normalization
for (i = 0; i < pieces_length; ++i) {
//if (PROGRESS) printf("SEGMENT\n");
pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
}
delete[] list;
return;
// Common section to cleanup all local variables and reset object variables.
CleanPartialInitialization:
if (list != NULL) {
delete[] list;
}
cleanPieces();
}
/**
* Dumb recursive implementation of permutation.
* TODO: optimize
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
if(U_FAILURE(status)) {
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;
// optimization:
// if zero or one character, just return a set with it
// we check for length < 2 to keep from counting code points all the time
if (source.length() <= 2 && source.countChar32() <= 1) {
UnicodeString *toPut = new UnicodeString(source);
/* test for NULL */
if (toPut == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
result->put(source, toPut, status);
return;
}
// otherwise iterate through the string, and recursively permute all the other characters
UChar32 cp;
Hashtable subpermute(status);
if(U_FAILURE(status)) {
return;
}
subpermute.setValueDeleter(uprv_deleteUObject);
for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
cp = source.char32At(i);
const UHashElement *ne = NULL;
int32_t el = -1;
UnicodeString subPermuteString = source;
// optimization:
// if the character is canonical combining class zero,
// don't permute it
if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
//System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
continue;
}
subpermute.removeAll();
// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.replace(i, U16_LENGTH(cp), NULL, 0), skipZeros, &subpermute, status);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
}
// The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents
// of source at this point.
// prefix this character to all of them
ne = subpermute.nextElement(el);
while (ne != NULL) {
UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
UnicodeString *chStr = new UnicodeString(cp);
//test for NULL
if (chStr == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
//if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr));
result->put(*chStr, chStr, status);
ne = subpermute.nextElement(el);
}
}
//return result;
}
// privates
// we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
Hashtable result(status);
Hashtable permutations(status);
Hashtable basic(status);
if (U_FAILURE(status)) {
return 0;
}
result.setValueDeleter(uprv_deleteUObject);
permutations.setValueDeleter(uprv_deleteUObject);
basic.setValueDeleter(uprv_deleteUObject);
UChar USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
getEquivalents2(&basic, USeg, segLen, status);
// now get all the permutations
// add only the ones that are canonically equivalent
// TODO: optimize by not permuting any class zero.
const UHashElement *ne = NULL;
int32_t el = -1;
//Iterator it = basic.iterator();
ne = basic.nextElement(el);
//while (it.hasNext())
while (ne != NULL) {
//String item = (String) it.next();
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
permutations.removeAll();
permute(item, CANITER_SKIP_ZEROES, &permutations, status);
const UHashElement *ne2 = NULL;
int32_t el2 = -1;
//Iterator it2 = permutations.iterator();
ne2 = permutations.nextElement(el2);
//while (it2.hasNext())
while (ne2 != NULL) {
//String possible = (String) it2.next();
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
UnicodeString attempt;
nfd.normalize(possible, attempt, status);
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
if (attempt==segment) {
//if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
// TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
result.put(possible, new UnicodeString(possible), status); //add(possible);
} else {
//if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
}
ne2 = permutations.nextElement(el2);
}
ne = basic.nextElement(el);
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return 0;
}
// convert into a String[] to clean up storage
//String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL;
int32_t resultCount;
if((resultCount = result.count())) {
finalResult = new UnicodeString[resultCount];
if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
//result.toArray(finalResult);
result_len = 0;
el = -1;
ne = result.nextElement(el);
while(ne != NULL) {
finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
ne = result.nextElement(el);
}
return finalResult;
}
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
UnicodeString toPut(segment, segLen);
fillinResult->put(toPut, new UnicodeString(toPut), status);
UnicodeSet starts;
// cycle through all the characters
UChar32 cp;
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
// see if any character is at the start of some decomposition
U16_GET(segment, 0, i, segLen, cp);
if (!nfcImpl.getCanonStartSet(cp, starts)) {
continue;
}
// if so, see which decompositions match
UnicodeSetIterator iter(starts);
while (iter.next()) {
UChar32 cp2 = iter.getCodepoint();
Hashtable remainder(status);
remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
continue;
}
// there were some matches, so add all the possibilities to the set.
UnicodeString prefix(segment, i);
prefix += cp2;
int32_t el = -1;
const UHashElement *ne = remainder.nextElement(el);
while (ne != NULL) {
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
UnicodeString *toAdd = new UnicodeString(prefix);
/* test for NULL */
if (toAdd == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
*toAdd += item;
fillinResult->put(*toAdd, toAdd, status);
//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
ne = remainder.nextElement(el);
}
}
}
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return NULL;
}
return fillinResult;
}
/**
* See if the decomposition of cp2 is at segment starting at segmentPos
* (with canonical rearrangment!)
* If so, take the remainder, and return the equivalents
*/
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
if (U_FAILURE(status)) {
return NULL;
}
UnicodeString temp(comp);
int32_t inputLen=temp.length();
UnicodeString decompString;
nfd.normalize(temp, decompString, status);
const UChar *decomp=decompString.getBuffer();
int32_t decompLen=decompString.length();
// See if it matches the start of segment (at segmentPos)
UBool ok = FALSE;
UChar32 cp;
int32_t decompPos = 0;
UChar32 decompCp;
U16_NEXT(decomp, decompPos, decompLen, decompCp);
int32_t i = segmentPos;
while(i < segLen) {
U16_NEXT(segment, i, segLen, cp);
if (cp == decompCp) { // if equal, eat another cp from decomp
//if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp))));
if (decompPos == decompLen) { // done, have all decomp characters!
temp.append(segment+i, segLen-i);
ok = TRUE;
break;
}
U16_NEXT(decomp, decompPos, decompLen, decompCp);
} else {
//if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp))));
// brute force approach
temp.append(cp);
/* TODO: optimize
// since we know that the classes are monotonically increasing, after zero
// e.g. 0 5 7 9 0 3
// we can do an optimization
// there are only a few cases that work: zero, less, same, greater
// if both classes are the same, we fail
// if the decomp class < the segment class, we fail
segClass = getClass(cp);
if (decompClass <= segClass) return null;
*/
}
}
if (!ok)
return NULL; // we failed, characters left over
//if (PROGRESS) printf("Matches\n");
if (inputLen == temp.length()) {
fillinResult->put(UnicodeString(), new UnicodeString(), status);
return fillinResult; // succeed, but no remainder
}
// brute force approach
// check to make sure result is canonically equivalent
UnicodeString trial;
nfd.normalize(temp, trial, status);
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
return NULL;
}
return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

Просмотреть файл

@ -0,0 +1,98 @@
/*
**********************************************************************
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/chariter.h"
U_NAMESPACE_BEGIN
ForwardCharacterIterator::~ForwardCharacterIterator() {}
ForwardCharacterIterator::ForwardCharacterIterator()
: UObject()
{}
ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other)
: UObject(other)
{}
CharacterIterator::CharacterIterator()
: textLength(0), pos(0), begin(0), end(0) {
}
CharacterIterator::CharacterIterator(int32_t length)
: textLength(length), pos(0), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t position)
: textLength(length), pos(position), begin(0), end(length) {
if(textLength < 0) {
textLength = end = 0;
}
if(pos < 0) {
pos = 0;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position)
: textLength(length), pos(position), begin(textBegin), end(textEnd) {
if(textLength < 0) {
textLength = 0;
}
if(begin < 0) {
begin = 0;
} else if(begin > textLength) {
begin = textLength;
}
if(end < begin) {
end = begin;
} else if(end > textLength) {
end = textLength;
}
if(pos < begin) {
pos = begin;
} else if(pos > end) {
pos = end;
}
}
CharacterIterator::~CharacterIterator() {}
CharacterIterator::CharacterIterator(const CharacterIterator &that) :
ForwardCharacterIterator(that),
textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end)
{
}
CharacterIterator &
CharacterIterator::operator=(const CharacterIterator &that) {
ForwardCharacterIterator::operator=(that);
textLength = that.textLength;
pos = that.pos;
begin = that.begin;
end = that.end;
return *this;
}
// implementing first[32]PostInc() directly in a subclass should be faster
// but these implementations make subclassing a little easier
UChar
CharacterIterator::firstPostInc(void) {
setToStart();
return nextPostInc();
}
UChar32
CharacterIterator::first32PostInc(void) {
setToStart();
return next32PostInc();
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,145 @@
/*
*******************************************************************************
* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: charstr.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010may19
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
len=s.len;
uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1);
}
return *this;
}
CharString &CharString::truncate(int32_t newLength) {
if(newLength<0) {
newLength=0;
}
if(newLength<len) {
buffer[len=newLength]=0;
}
return *this;
}
CharString &CharString::append(char c, UErrorCode &errorCode) {
if(ensureCapacity(len+2, 0, errorCode)) {
buffer[len++]=c;
buffer[len]=0;
}
return *this;
}
CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(sLength<-1 || (s==NULL && sLength!=0)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return *this;
}
if(sLength<0) {
sLength=uprv_strlen(s);
}
if(sLength>0) {
if(s==(buffer.getAlias()+len)) {
// The caller wrote into the getAppendBuffer().
if(sLength>=(buffer.getCapacity()-len)) {
// The caller wrote too much.
errorCode=U_INTERNAL_PROGRAM_ERROR;
} else {
buffer[len+=sLength]=0;
}
} else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) &&
sLength>=(buffer.getCapacity()-len)
) {
// (Part of) this string is appended to itself which requires reallocation,
// so we have to make a copy of the substring and append that.
return append(CharString(s, sLength, errorCode), errorCode);
} else if(ensureCapacity(len+sLength+1, 0, errorCode)) {
uprv_memcpy(buffer.getAlias()+len, s, sLength);
buffer[len+=sLength]=0;
}
}
return *this;
}
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
resultCapacity=0;
return NULL;
}
int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL
if(appendCapacity>=minCapacity) {
resultCapacity=appendCapacity;
return buffer.getAlias()+len;
}
if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) {
resultCapacity=buffer.getCapacity()-len-1;
return buffer.getAlias()+len;
}
resultCapacity=0;
return NULL;
}
CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
if(ensureCapacity(len+s.length()+1, 0, errorCode)) {
len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV);
}
return *this;
}
UBool CharString::ensureCapacity(int32_t capacity,
int32_t desiredCapacityHint,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return FALSE;
}
if(capacity>buffer.getCapacity()) {
if(desiredCapacityHint==0) {
desiredCapacityHint=capacity+buffer.getCapacity();
}
if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) &&
buffer.resize(capacity, len+1)==NULL
) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
}
return TRUE;
}
CharString &CharString::appendPathPart(const StringPiece &s, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return *this;
}
if(s.length()==0) {
return *this;
}
char c;
if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
append(U_FILE_SEP_CHAR, errorCode);
}
append(s, errorCode);
return *this;
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,130 @@
/*
**********************************************************************
* Copyright (c) 2001-2012, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/19/2001 aliu Creation.
* 05/19/2010 markus Rewritten from scratch
**********************************************************************
*/
#ifndef CHARSTRING_H
#define CHARSTRING_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "cmemory.h"
U_NAMESPACE_BEGIN
// Windows needs us to DLL-export the MaybeStackArray template specialization,
// but MacOS X cannot handle it. Same as in digitlst.h.
#if !U_PLATFORM_IS_DARWIN_BASED
template class U_COMMON_API MaybeStackArray<char, 40>;
#endif
/**
* ICU-internal char * string class.
* This class does not assume or enforce any particular character encoding.
* Raw bytes can be stored. The string object owns its characters.
* A terminating NUL is stored, but the class does not prevent embedded NUL characters.
*
* This class wants to be convenient but is also deliberately minimalist.
* Please do not add methods if they only add minor convenience.
* For example:
* cs.data()[5]='a'; // no need for setCharAt(5, 'a')
*/
class U_COMMON_API CharString : public UMemory {
public:
CharString() : len(0) { buffer[0]=0; }
CharString(const StringPiece &s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, errorCode);
}
CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
buffer[0]=0;
append(s, sLength, errorCode);
}
~CharString() {}
/**
* Replaces this string's contents with the other string's contents.
* CharString does not support the standard copy constructor nor
* the assignment operator, to make copies explicit and to
* use a UErrorCode where memory allocations might be needed.
*/
CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
UBool isEmpty() const { return len==0; }
int32_t length() const { return len; }
char operator[](int32_t index) const { return buffer[index]; }
StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
const char *data() const { return buffer.getAlias(); }
char *data() { return buffer.getAlias(); }
CharString &clear() { len=0; buffer[0]=0; return *this; }
CharString &truncate(int32_t newLength);
CharString &append(char c, UErrorCode &errorCode);
CharString &append(const StringPiece &s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const CharString &s, UErrorCode &errorCode) {
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
* There will additionally be space for a terminating NUL right at resultCapacity.
* (This function is similar to ByteSink.GetAppendBuffer().)
*
* The returned buffer is only valid until the next write operation
* on this string.
*
* After writing at most resultCapacity bytes, call append() with the
* pointer returned from this function and the number of bytes written.
*
* @param minCapacity required minimum capacity of the returned buffer;
* must be non-negative
* @param desiredCapacityHint desired capacity of the returned buffer;
* must be non-negative
* @param resultCapacity will be set to the capacity of the returned buffer
* @param errorCode in/out error code
* @return a buffer with resultCapacity>=min_capacity
*/
char *getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,
UErrorCode &errorCode);
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
/**
* Appends a filename/path part, e.g., a directory name.
* First appends a U_FILE_SEP_CHAR if necessary.
* Does nothing if s is empty.
*/
CharString &appendPathPart(const StringPiece &s, UErrorCode &errorCode);
private:
MaybeStackArray<char, 40> buffer;
int32_t len;
UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode);
CharString(const CharString &other); // forbid copying of this class
CharString &operator=(const CharString &other); // forbid copying of this class
};
U_NAMESPACE_END
#endif
//eof

Просмотреть файл

@ -0,0 +1,183 @@
/*
******************************************************************************
*
* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File cmemory.c ICU Heap allocation.
* All ICU heap allocation, both for C and C++ new of ICU
* class types, comes through these functions.
*
* If you have a need to replace ICU allocation, this is the
* place to do it.
*
* Note that uprv_malloc(0) returns a non-NULL pointer, and
* that a subsequent free of that pointer value is a NOP.
*
******************************************************************************
*/
#include "unicode/uclean.h"
#include "cmemory.h"
#include "putilimp.h"
#include "uassert.h"
#include <stdlib.h>
/* uprv_malloc(0) returns a pointer to this read-only data. */
static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0};
/* Function Pointers for user-supplied heap functions */
static const void *pContext;
static UMemAllocFn *pAlloc;
static UMemReallocFn *pRealloc;
static UMemFreeFn *pFree;
/* Flag indicating whether any heap allocations have happened.
* Used to prevent changing out the heap functions after allocations have been made */
static UBool gHeapInUse;
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
static int n=0;
static long b=0;
#endif
#if U_DEBUG
static char gValidMemorySink = 0;
U_CAPI void uprv_checkValidMemory(const void *p, size_t n) {
/*
* Access the memory to ensure that it's all valid.
* Load and save a computed value to try to ensure that the compiler
* does not throw away the whole loop.
* A thread analyzer might complain about un-mutexed access to gValidMemorySink
* which is true but harmless because no one ever uses the value in gValidMemorySink.
*/
const char *s = (const char *)p;
char c = gValidMemorySink;
size_t i;
U_ASSERT(p != NULL);
for(i = 0; i < n; ++i) {
c ^= s[i];
}
gValidMemorySink = c;
}
#endif /* U_DEBUG */
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#if 1
putchar('>');
fflush(stdout);
#else
fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr);
#endif
#endif
if (s > 0) {
gHeapInUse = TRUE;
if (pAlloc) {
return (*pAlloc)(pContext, s);
} else {
return uprv_default_malloc(s);
}
} else {
return (void *)zeroMem;
}
}
U_CAPI void * U_EXPORT2
uprv_realloc(void * buffer, size_t size) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
putchar('~');
fflush(stdout);
#endif
if (buffer == zeroMem) {
return uprv_malloc(size);
} else if (size == 0) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
uprv_default_free(buffer);
}
return (void *)zeroMem;
} else {
gHeapInUse = TRUE;
if (pRealloc) {
return (*pRealloc)(pContext, buffer, size);
} else {
return uprv_default_realloc(buffer, size);
}
}
}
U_CAPI void U_EXPORT2
uprv_free(void *buffer) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
putchar('<');
fflush(stdout);
#endif
if (buffer != zeroMem) {
if (pFree) {
(*pFree)(pContext, buffer);
} else {
uprv_default_free(buffer);
}
}
}
U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) {
void *mem = NULL;
size *= num;
mem = uprv_malloc(size);
if (mem) {
uprv_memset(mem, 0, size);
}
return mem;
}
U_CAPI void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status)
{
if (U_FAILURE(*status)) {
return;
}
if (a==NULL || r==NULL || f==NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (gHeapInUse) {
*status = U_INVALID_STATE_ERROR;
return;
}
pContext = context;
pAlloc = a;
pRealloc = r;
pFree = f;
}
U_CFUNC UBool cmemory_cleanup(void) {
pContext = NULL;
pAlloc = NULL;
pRealloc = NULL;
pFree = NULL;
gHeapInUse = FALSE;
return TRUE;
}
/*
* gHeapInUse
* Return True if ICU has allocated any memory.
* Used by u_SetMutexFunctions() and similar to verify that ICU has not
* been used, that it is in a pristine initial state.
*/
U_CFUNC UBool cmemory_inUse() {
return gHeapInUse;
}

Просмотреть файл

@ -0,0 +1,599 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CMEMORY.H
*
* Contains stdlib.h/string.h memory functions
*
* @author Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 6/20/98 Bertrand Created.
* 05/03/99 stephen Changed from functions to macros.
*
******************************************************************************
*/
#ifndef CMEMORY_H
#define CMEMORY_H
#include "unicode/utypes.h"
#include <stddef.h>
#include <string.h>
#include "unicode/localpointer.h"
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif
#if U_DEBUG
/*
* The C++ standard requires that the source pointer for memcpy() & memmove()
* is valid, not NULL, and not at the end of an allocated memory block.
* In debug mode, we read one byte from the source point to verify that it's
* a valid, readable pointer.
*/
U_CAPI void uprv_checkValidMemory(const void *p, size_t n);
#define uprv_memcpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size))
#define uprv_memmove(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size))
#else
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
#endif /* U_DEBUG */
#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size)
U_CAPI void * U_EXPORT2
uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1);
U_CAPI void * U_EXPORT2
uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2);
U_CAPI void U_EXPORT2
uprv_free(void *mem);
U_CAPI void * U_EXPORT2
uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2);
/**
* This should align the memory properly on any machine.
* This is very useful for the safeClone functions.
*/
typedef union {
long t1;
double t2;
void *t3;
} UAlignedMemory;
/**
* Get the least significant bits of a pointer (a memory address).
* For example, with a mask of 3, the macro gets the 2 least significant bits,
* which will be 0 if the pointer is 32-bit (4-byte) aligned.
*
* ptrdiff_t is the most appropriate integer type to cast to.
* size_t should work too, since on most (or all?) platforms it has the same
* width as ptrdiff_t.
*/
#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
/**
* Get the amount of bytes that a pointer is off by from
* the previous UAlignedMemory-aligned pointer.
*/
#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
/**
* Get the amount of bytes to add to a pointer
* in order to get the next UAlignedMemory-aligned address.
*/
#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
/**
* Indicate whether the ICU allocation functions have been used.
* This is used to determine whether ICU is in an initial, unused state.
*/
U_CFUNC UBool
cmemory_inUse(void);
/**
* Heap clean up function, called from u_cleanup()
* Clears any user heap functions from u_setMemoryFunctions()
* Does NOT deallocate any remaining allocated memory.
*/
U_CFUNC UBool
cmemory_cleanup(void);
/**
* A function called by <TT>uhash_remove</TT>,
* <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
* an existing key or value.
* @param obj A key or value stored in a hashtable
* @see uprv_deleteUObject
*/
typedef void U_CALLCONV UObjectDeleter(void* obj);
/**
* Deleter for UObject instances.
* Works for all subclasses of UObject because it has a virtual destructor.
*/
U_CAPI void U_EXPORT2
uprv_deleteUObject(void *obj);
#ifdef __cplusplus
U_NAMESPACE_BEGIN
/**
* "Smart pointer" class, deletes memory via uprv_free().
* For most methods see the LocalPointerBase base class.
* Adds operator[] for array item access.
*
* @see LocalPointerBase
*/
template<typename T>
class LocalMemory : public LocalPointerBase<T> {
public:
/**
* Constructor takes ownership.
* @param p simple pointer to an array of T items that is adopted
*/
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
/**
* Destructor deletes the memory it owns.
*/
~LocalMemory() {
uprv_free(LocalPointerBase<T>::ptr);
}
/**
* Deletes the array it owns,
* and adopts (takes ownership of) the one passed in.
* @param p simple pointer to an array of T items that is adopted
*/
void adoptInstead(T *p) {
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
/**
* Deletes the array it owns, allocates a new one and reset its bytes to 0.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity must be >0
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *allocateInsteadAndReset(int32_t newCapacity=1);
/**
* Deletes the array it owns and allocates a new one, copying length T items.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity must be >0
* @param length number of T items to be copied from the old array to the new one;
* must be no more than the capacity of the old array,
* which the caller must track because the LocalMemory does not track it
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0);
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
};
template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) {
if(newCapacity>0) {
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
uprv_memset(p, 0, newCapacity*sizeof(T));
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
return p;
} else {
return NULL;
}
}
template<typename T>
inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) {
if(newCapacity>0) {
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
if(length>0) {
if(length>newCapacity) {
length=newCapacity;
}
uprv_memcpy(p, LocalPointerBase<T>::ptr, length*sizeof(T));
}
uprv_free(LocalPointerBase<T>::ptr);
LocalPointerBase<T>::ptr=p;
}
return p;
} else {
return NULL;
}
}
/**
* Simple array/buffer management class using uprv_malloc() and uprv_free().
* Provides an internal array with fixed capacity. Can alias another array
* or allocate one.
*
* The array address is properly aligned for type T. It might not be properly
* aligned for types larger than T (or larger than the largest subtype of T).
*
* Unlike LocalMemory and LocalArray, this class never adopts
* (takes ownership of) another array.
*/
template<typename T, int32_t stackCapacity>
class MaybeStackArray {
public:
/**
* Default constructor initializes with internal T[stackCapacity] buffer.
*/
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Destructor deletes the array (if owned).
*/
~MaybeStackArray() { releaseArray(); }
/**
* Returns the array capacity (number of T items).
* @return array capacity
*/
int32_t getCapacity() const { return capacity; }
/**
* Access without ownership change.
* @return the array pointer
*/
T *getAlias() const { return ptr; }
/**
* Returns the array limit. Simple convenience method.
* @return getAlias()+getCapacity()
*/
T *getArrayLimit() const { return getAlias()+capacity; }
// No "operator T *() const" because that can make
// expressions like mbs[index] ambiguous for some compilers.
/**
* Array item access (const).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
const T &operator[](ptrdiff_t i) const { return ptr[i]; }
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) { return ptr[i]; }
/**
* Deletes the array (if owned) and aliases another one, no transfer of ownership.
* If the arguments are illegal, then the current array is unchanged.
* @param otherArray must not be NULL
* @param otherCapacity must be >0
*/
void aliasInstead(T *otherArray, int32_t otherCapacity) {
if(otherArray!=NULL && otherCapacity>0) {
releaseArray();
ptr=otherArray;
capacity=otherCapacity;
needToRelease=FALSE;
}
}
/**
* Deletes the array (if owned) and allocates a new one, copying length T items.
* Returns the new array pointer.
* If the allocation fails, then the current array is unchanged and
* this method returns NULL.
* @param newCapacity can be less than or greater than the current capacity;
* must be >0
* @param length number of T items to be copied from the old array to the new one
* @return the allocated array pointer, or NULL if the allocation failed
*/
inline T *resize(int32_t newCapacity, int32_t length=0);
/**
* Gives up ownership of the array if owned, or else clones it,
* copying length T items; resets itself to the internal stack array.
* Returns NULL if the allocation failed.
* @param length number of T items to copy when cloning,
* and capacity of the clone when cloning
* @param resultCapacity will be set to the returned array's capacity (output-only)
* @return the array pointer;
* caller becomes responsible for deleting the array
*/
inline T *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
T *ptr;
int32_t capacity;
UBool needToRelease;
T stackArray[stackCapacity];
void releaseArray() {
if(needToRelease) {
uprv_free(ptr);
}
}
/* No comparison operators with other MaybeStackArray's. */
bool operator==(const MaybeStackArray & /*other*/) {return FALSE;}
bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;}
/* No ownership transfer: No copy constructor, no assignment operator. */
MaybeStackArray(const MaybeStackArray & /*other*/) {}
void operator=(const MaybeStackArray & /*other*/) {}
// No heap allocation. Use only on the stack.
// (Declaring these functions private triggers a cascade of problems:
// MSVC insists on exporting an instantiation of MaybeStackArray, which
// requires that all functions be defined.
// An empty implementation of new() is rejected, it must return a value.
// Returning NULL is rejected by gcc for operator new.
// The expedient thing is just not to override operator new.
// While relatively pointless, heap allocated instances will function.
// static void * U_EXPORT2 operator new(size_t size);
// static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
// static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) {
if(newCapacity>0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T));
#endif
T *p=(T *)uprv_malloc(newCapacity*sizeof(T));
if(p!=NULL) {
if(length>0) {
if(length>capacity) {
length=capacity;
}
if(length>newCapacity) {
length=newCapacity;
}
uprv_memcpy(p, ptr, length*sizeof(T));
}
releaseArray();
ptr=p;
capacity=newCapacity;
needToRelease=TRUE;
}
return p;
} else {
return NULL;
}
}
template<typename T, int32_t stackCapacity>
inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) {
T *p;
if(needToRelease) {
p=ptr;
} else if(length<=0) {
return NULL;
} else {
if(length>capacity) {
length=capacity;
}
p=(T *)uprv_malloc(length*sizeof(T));
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T));
#endif
if(p==NULL) {
return NULL;
}
uprv_memcpy(p, ptr, length*sizeof(T));
}
resultCapacity=length;
ptr=stackArray;
capacity=stackCapacity;
needToRelease=FALSE;
return p;
}
/**
* Variant of MaybeStackArray that allocates a header struct and an array
* in one contiguous memory block, using uprv_malloc() and uprv_free().
* Provides internal memory with fixed array capacity. Can alias another memory
* block or allocate one.
* The stackCapacity is the number of T items in the internal memory,
* not counting the H header.
* Unlike LocalMemory and LocalArray, this class never adopts
* (takes ownership of) another memory block.
*/
template<typename H, typename T, int32_t stackCapacity>
class MaybeStackHeaderAndArray {
public:
/**
* Default constructor initializes with internal H+T[stackCapacity] buffer.
*/
MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Destructor deletes the memory (if owned).
*/
~MaybeStackHeaderAndArray() { releaseMemory(); }
/**
* Returns the array capacity (number of T items).
* @return array capacity
*/
int32_t getCapacity() const { return capacity; }
/**
* Access without ownership change.
* @return the header pointer
*/
H *getAlias() const { return ptr; }
/**
* Returns the array start.
* @return array start, same address as getAlias()+1
*/
T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); }
/**
* Returns the array limit.
* @return array limit
*/
T *getArrayLimit() const { return getArrayStart()+capacity; }
/**
* Access without ownership change. Same as getAlias().
* A class instance can be used directly in expressions that take a T *.
* @return the header pointer
*/
operator H *() const { return ptr; }
/**
* Array item access (writable).
* No index bounds check.
* @param i array index
* @return reference to the array item
*/
T &operator[](ptrdiff_t i) { return getArrayStart()[i]; }
/**
* Deletes the memory block (if owned) and aliases another one, no transfer of ownership.
* If the arguments are illegal, then the current memory is unchanged.
* @param otherArray must not be NULL
* @param otherCapacity must be >0
*/
void aliasInstead(H *otherMemory, int32_t otherCapacity) {
if(otherMemory!=NULL && otherCapacity>0) {
releaseMemory();
ptr=otherMemory;
capacity=otherCapacity;
needToRelease=FALSE;
}
}
/**
* Deletes the memory block (if owned) and allocates a new one,
* copying the header and length T array items.
* Returns the new header pointer.
* If the allocation fails, then the current memory is unchanged and
* this method returns NULL.
* @param newCapacity can be less than or greater than the current capacity;
* must be >0
* @param length number of T items to be copied from the old array to the new one
* @return the allocated pointer, or NULL if the allocation failed
*/
inline H *resize(int32_t newCapacity, int32_t length=0);
/**
* Gives up ownership of the memory if owned, or else clones it,
* copying the header and length T array items; resets itself to the internal memory.
* Returns NULL if the allocation failed.
* @param length number of T items to copy when cloning,
* and array capacity of the clone when cloning
* @param resultCapacity will be set to the returned array's capacity (output-only)
* @return the header pointer;
* caller becomes responsible for deleting the array
*/
inline H *orphanOrClone(int32_t length, int32_t &resultCapacity);
private:
H *ptr;
int32_t capacity;
UBool needToRelease;
// stackHeader must precede stackArray immediately.
H stackHeader;
T stackArray[stackCapacity];
void releaseMemory() {
if(needToRelease) {
uprv_free(ptr);
}
}
/* No comparison operators with other MaybeStackHeaderAndArray's. */
bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;}
bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;}
/* No ownership transfer: No copy constructor, no assignment operator. */
MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {}
void operator=(const MaybeStackHeaderAndArray & /*other*/) {}
// No heap allocation. Use only on the stack.
// (Declaring these functions private triggers a cascade of problems;
// see the MaybeStackArray class for details.)
// static void * U_EXPORT2 operator new(size_t size);
// static void * U_EXPORT2 operator new[](size_t size);
#if U_HAVE_PLACEMENT_NEW
// static void * U_EXPORT2 operator new(size_t, void *ptr);
#endif
};
template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity,
int32_t length) {
if(newCapacity>=0) {
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T));
#endif
H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T));
if(p!=NULL) {
if(length<0) {
length=0;
} else if(length>0) {
if(length>capacity) {
length=capacity;
}
if(length>newCapacity) {
length=newCapacity;
}
}
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
releaseMemory();
ptr=p;
capacity=newCapacity;
needToRelease=TRUE;
}
return p;
} else {
return NULL;
}
}
template<typename H, typename T, int32_t stackCapacity>
inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length,
int32_t &resultCapacity) {
H *p;
if(needToRelease) {
p=ptr;
} else {
if(length<0) {
length=0;
} else if(length>capacity) {
length=capacity;
}
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T));
#endif
p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T));
if(p==NULL) {
return NULL;
}
uprv_memcpy(p, ptr, sizeof(H)+length*sizeof(T));
}
resultCapacity=length;
ptr=&stackHeader;
capacity=stackCapacity;
needToRelease=FALSE;
return p;
}
U_NAMESPACE_END
#endif /* __cplusplus */
#endif /* CMEMORY_H */

Просмотреть файл

@ -0,0 +1,108 @@
// Do not edit with Microsoft Developer Studio Resource Editor.
// It will permanently substitute version numbers that are intended to be
// picked up by the pre-processor during each build.
// Copyright (c) 2001-2010 International Business Machines
// Corporation and others. All Rights Reserved.
//
#include "msvcres.h"
#define APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 2 resource.
//
#include <winresrc.h>
/////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
//
LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL
#pragma code_page(1252)
#ifdef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// TEXTINCLUDE
//
1 TEXTINCLUDE
BEGIN
"msvcres.h\0"
END
2 TEXTINCLUDE
BEGIN
"#include <winresrc.h>\0"
END
3 TEXTINCLUDE
BEGIN
"\r\n"
"\0"
END
#endif // APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
#define STR(s) #s
#define CommaVersionString(a, b, c, d) STR(a) ", " STR(b) ", " STR(c) ", " STR(d) "\0"
VS_VERSION_INFO VERSIONINFO
FILEVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
PRODUCTVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
#else
FILEFLAGS 0x0L
#endif
FILEOS VOS__WINDOWS32
FILETYPE VFT_DLL
FILESUBTYPE 0x0L
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "00000000"
BEGIN
VALUE "Comments", ICU_WEBSITE "\0"
VALUE "CompanyName", ICU_COMPANY "\0"
VALUE "FileDescription", ICU_PRODUCT_PREFIX " Common DLL\0"
VALUE "FileVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
VALUE "LegalCopyright", U_COPYRIGHT_STRING "\0"
#ifdef _DEBUG
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT "d.dll\0"
#else
VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT ".dll\0"
#endif
VALUE "PrivateBuild", "\0"
VALUE "ProductName", ICU_PRODUCT "\0"
VALUE "ProductVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM)
VALUE "SpecialBuild", "\0"
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x000, 0000
END
END
/////////////////////////////////////////////////////////////////////////////
#ifndef APSTUDIO_INVOKED
/////////////////////////////////////////////////////////////////////////////
//
// Generated from the TEXTINCLUDE 3 resource.
//
/////////////////////////////////////////////////////////////////////////////
#endif // not APSTUDIO_INVOKED

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,95 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cpputils.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*/
#ifndef CPPUTILS_H
#define CPPUTILS_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "cmemory.h"
/*==========================================================================*/
/* Array copy utility functions */
/*==========================================================================*/
static
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
double* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
int8_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
int16_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
static
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
int32_t* dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
static
inline void
uprv_arrayCopy(const UChar *src, int32_t srcStart,
UChar *dst, int32_t dstStart, int32_t count)
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count)
{ while(count-- > 0) *dst++ = *src++; }
/**
* Copy an array of UnicodeString OBJECTS (not pointers).
* @internal
*/
static inline void
uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart,
icu::UnicodeString *dst, int32_t dstStart, int32_t count)
{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
/**
* Checks that the string is readable and writable.
* Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer().
*/
inline void
uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) {
if(U_SUCCESS(errorCode) && s.isBogus()) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
}
#endif /* _CPPUTILS */

Просмотреть файл

@ -0,0 +1,339 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/18/98 hshih Created
* 09/08/98 stephen Added include for ctype, for Mac Port
* 11/15/99 helena Integrated S/390 IEEE changes.
******************************************************************************
*/
#include <stdlib.h>
#include <stdio.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
/*
* We hardcode case conversion for invariant characters to match our expectation
* and the compiler execution charset.
* This prevents problems on systems
* - with non-default casing behavior, like Turkish system locales where
* tolower('I') maps to dotless i and toupper('i') maps to dotted I
* - where there are no lowercase Latin characters at all, or using different
* codes (some old EBCDIC codepages)
*
* This works because the compiler usually runs on a platform where the execution
* charset includes all of the invariant characters at their expected
* code positions, so that the char * string literals in ICU code match
* the char literals here.
*
* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
* and the set of uppercase Latin letters is discontiguous as well.
*/
U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
return
('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
#else
return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
#endif
}
U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
c=(char)(c+('A'-'a'));
}
#else
if('a'<=c && c<='z') {
c=(char)(c+('A'-'a'));
}
#endif
return c;
}
#if 0
/*
* Commented out because cstring.h defines uprv_tolower() to be
* the same as either uprv_asciitolower() or uprv_ebcdictolower()
* to reduce the amount of code to cover with tests.
*
* Note that this uprv_tolower() definition is likely to work for most
* charset families, not just ASCII and EBCDIC, because its #else branch
* is written generically.
*/
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
c=(char)(c+('a'-'A'));
}
#else
if('A'<=c && c<='Z') {
c=(char)(c+('a'-'A'));
}
#endif
return c;
}
#endif
U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
if(0x41<=c && c<=0x5a) {
c=(char)(c+0x20);
}
return c;
}
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
) {
c=(char)(c-0x40);
}
return c;
}
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_tolower(*str);
while (*(str++));
}
return origPtr;
}
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
char* origPtr = str;
if (str) {
do
*str = (char)uprv_toupper(*str);
while (*(str++));
}
return origPtr;
}
/*
* Takes a int32_t and fills in a char* string with that number "radix"-based.
* Does not handle negative values (makes an empty string for them).
* Writes at most 12 chars ("-2147483647" plus NUL).
* Returns the length of the string (not including the NUL).
*/
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint32_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint32_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint32_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
/*
* Takes a int64_t and fills in a char* string with that number "radix"-based.
* Writes at most 21: chars ("-9223372036854775807" plus NUL).
* Returns the length of the string, not including the terminating NULL.
*/
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
char tbuf[30];
int32_t tbx = sizeof(tbuf);
uint8_t digit;
int32_t length = 0;
uint64_t uval;
U_ASSERT(radix>=2 && radix<=16);
uval = (uint64_t) v;
if(v<0 && radix == 10) {
/* Only in base 10 do we conside numbers to be signed. */
uval = (uint64_t)(-v);
buffer[length++] = '-';
}
tbx = sizeof(tbuf)-1;
tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
do {
digit = (uint8_t)(uval % radix);
tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
uval = uval / radix;
} while (uval != 0);
/* copy converted number into user buffer */
uprv_strcpy(buffer+length, tbuf+tbx);
length += sizeof(tbuf) - tbx -1;
return length;
}
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
char *end;
return uprv_strtoul(integerString, &end, radix);
}
U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(;;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
}
U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
if(str1==NULL) {
if(str2==NULL) {
return 0;
} else {
return -1;
}
} else if(str2==NULL) {
return 1;
} else {
/* compare non-NULL strings lexically with lowercase */
int rc;
unsigned char c1, c2;
for(; n--;) {
c1=(unsigned char)*str1;
c2=(unsigned char)*str2;
if(c1==0) {
if(c2==0) {
return 0;
} else {
return -1;
}
} else if(c2==0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
}
++str1;
++str2;
}
}
return 0;
}
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
size_t len = uprv_strlen(src) + 1;
char *dup = (char *) uprv_malloc(len);
if (dup) {
uprv_memcpy(dup, src, len);
}
return dup;
}
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
char *dup;
if(n < 0) {
dup = uprv_strdup(src);
} else {
dup = (char*)uprv_malloc(n+1);
if (dup) {
uprv_memcpy(dup, src, n);
dup[n] = 0;
}
}
return dup;
}

Просмотреть файл

@ -0,0 +1,140 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.H
*
* Contains CString interface
*
* @author Helena Shih
*
* Modification History:
*
* Date Name Description
* 6/17/98 hshih Created.
* 05/03/99 stephen Changed from functions to macros.
* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower
*
******************************************************************************
*/
#ifndef CSTRING_H
#define CSTRING_H 1
#include "unicode/utypes.h"
#include "cmemory.h"
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src)
#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str)
#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2)
#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src)
#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c)
#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c)
#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c)
#if U_DEBUG
#define uprv_strncpy(dst, src, size) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size))
#define uprv_strncmp(s1, s2, n) ( \
uprv_checkValidMemory(s1, 1), \
uprv_checkValidMemory(s2, 1), \
U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n))
#define uprv_strncat(dst, src, n) ( \
uprv_checkValidMemory(src, 1), \
U_STANDARD_CPP_NAMESPACE strncat(dst, src, n))
#else
#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size)
#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n)
#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n)
#endif /* U_DEBUG */
/**
* Is c an ASCII-repertoire letter a-z or A-Z?
* Note: The implementation is specific to whether ICU is compiled for
* an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this.
*/
U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c);
U_CAPI char U_EXPORT2
uprv_toupper(char c);
U_CAPI char U_EXPORT2
uprv_asciitolower(char c);
U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_tolower uprv_asciitolower
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_tolower uprv_ebcdictolower
#else
# error U_CHARSET_FAMILY is not valid
#endif
#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end)
#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base)
#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base)
/* Conversion from a digit to the character with radix base from 2-19 */
/* May need to use U_UPPER_ORDINAL*/
#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10))
U_CAPI char* U_EXPORT2
uprv_strdup(const char *src);
/**
* uprv_malloc n+1 bytes, and copy n bytes from src into the new string.
* Terminate with a null at offset n. If n is -1, works like uprv_strdup
* @param src
* @param n length of the input string, not including null.
* @return new string (owned by caller, use uprv_free to free).
* @internal
*/
U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n);
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str);
U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str);
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char *buffer, int32_t n, int32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix);
U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix);
/**
* Case-insensitive, language-independent string comparison
* limited to the ASCII character repertoire.
*/
U_CAPI int U_EXPORT2
uprv_stricmp(const char *str1, const char *str2);
/**
* Case-insensitive, language-independent string comparison
* limited to the ASCII character repertoire.
*/
U_CAPI int U_EXPORT2
uprv_strnicmp(const char *str1, const char *str2, uint32_t n);
#endif /* ! CSTRING_H */

Просмотреть файл

@ -0,0 +1,53 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !U_HAVE_WCSCPY
#include "cwchar.h"
U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while(*dst!=0) {
++dst;
}
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) {
wchar_t *start=dst;
while((*dst=*src)!=0) {
++dst;
++src;
}
return start;
}
U_CAPI size_t uprv_wcslen(const wchar_t *src) {
const wchar_t *start=src;
while(*src!=0) {
++src;
}
return src-start;
}
#endif

Просмотреть файл

@ -0,0 +1,56 @@
/*
******************************************************************************
*
* Copyright (C) 2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: cwchar.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2001may25
* created by: Markus W. Scherer
*
* This file contains ICU-internal definitions of wchar_t operations.
* These definitions were moved here from cstring.h so that fewer
* ICU implementation files include wchar.h.
*/
#ifndef __CWCHAR_H__
#define __CWCHAR_H__
#include <string.h>
#include <stdlib.h>
#include "unicode/utypes.h"
/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */
#if U_HAVE_WCHAR_H
# include <wchar.h>
#endif
/*===========================================================================*/
/* Wide-character functions */
/*===========================================================================*/
/* The following are not available on all systems, defined in wchar.h or string.h. */
#if U_HAVE_WCSCPY
# define uprv_wcscpy wcscpy
# define uprv_wcscat wcscat
# define uprv_wcslen wcslen
#else
U_CAPI wchar_t* U_EXPORT2
uprv_wcscpy(wchar_t *dst, const wchar_t *src);
U_CAPI wchar_t* U_EXPORT2
uprv_wcscat(wchar_t *dst, const wchar_t *src);
U_CAPI size_t U_EXPORT2
uprv_wcslen(const wchar_t *src);
#endif
/* The following are part of the ANSI C standard, defined in stdlib.h . */
#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count)
#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count)
#endif

Просмотреть файл

@ -0,0 +1,942 @@
/**
*******************************************************************************
* Copyright (C) 2006-2012, International Business Machines Corporation
* and others. All Rights Reserved.
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "dictbe.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
#include "unicode/ubrk.h"
#include "uvector.h"
#include "uassert.h"
#include "unicode/normlzr.h"
#include "cmemory.h"
#include "dictionarydata.h"
U_NAMESPACE_BEGIN
/*
******************************************************************
*/
DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes) {
fTypes = breakTypes;
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
}
UBool
DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
return (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)
&& fSet.contains(c));
}
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const {
int32_t result = 0;
// Find the span of characters included in the set.
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
if (reverse) {
UBool isDict = fSet.contains(c);
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
c = utext_previous32(text);
isDict = fSet.contains(c);
}
rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1);
rangeEnd = start + 1;
}
else {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
}
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
}
return result;
}
void
DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) {
fSet = set;
// Compact for caching
fSet.compact();
}
/*
******************************************************************
*/
// Helper class for improving readability of the Thai word break
// algorithm. The implementation is completely inline.
// List size, limited by the maximum number of words in the dictionary
// that form a nested sequence.
#define POSSIBLE_WORD_LIST_MAX 20
class PossibleWord {
private:
// list of word candidate lengths, in increasing length order
int32_t lengths[POSSIBLE_WORD_LIST_MAX];
int32_t count; // Count of candidates
int32_t prefix; // The longest match with a dictionary word
int32_t offset; // Offset in the text of these candidates
int mark; // The preferred candidate's offset
int current; // The candidate we're currently looking at
public:
PossibleWord();
~PossibleWord();
// Fill the list of candidates if needed, select the longest, and return the number found
int candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
// Select the currently marked candidate, point after it in the text, and invalidate self
int32_t acceptMarked( UText *text );
// Back up from the current candidate to the next shorter one; return TRUE if that exists
// and point the text after it
UBool backUp( UText *text );
// Return the longest prefix this candidate location shares with a dictionary word
int32_t longestPrefix();
// Mark the current candidate as the one we like
void markCurrent();
};
inline
PossibleWord::PossibleWord() {
offset = -1;
}
inline
PossibleWord::~PossibleWord() {
}
inline int
PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
// TODO: If getIndex is too slow, use offset < 0 and add discardAll()
int32_t start = (int32_t)utext_getNativeIndex(text);
if (start != offset) {
offset = start;
prefix = dict->matches(text, rangeEnd-start, lengths, count, sizeof(lengths)/sizeof(lengths[0]));
// Dictionary leaves text after longest prefix, not longest word. Back up.
if (count <= 0) {
utext_setNativeIndex(text, start);
}
}
if (count > 0) {
utext_setNativeIndex(text, start+lengths[count-1]);
}
current = count-1;
mark = current;
return count;
}
inline int32_t
PossibleWord::acceptMarked( UText *text ) {
utext_setNativeIndex(text, offset + lengths[mark]);
return lengths[mark];
}
inline UBool
PossibleWord::backUp( UText *text ) {
if (current > 0) {
utext_setNativeIndex(text, offset + lengths[--current]);
return TRUE;
}
return FALSE;
}
inline int32_t
PossibleWord::longestPrefix() {
return prefix;
}
inline void
PossibleWord::markCurrent() {
mark = current;
}
// How many words in a row are "good enough"?
#define THAI_LOOKAHEAD 3
// Will not combine a non-word with a preceding dictionary word longer than this
#define THAI_ROOT_COMBINE_THRESHOLD 3
// Will not combine a non-word that shares at least this much prefix with a
// dictionary word, with a preceding word
#define THAI_PREFIX_COMBINE_THRESHOLD 3
// Ellision character
#define THAI_PAIYANNOI 0x0E2F
// Repeat character
#define THAI_MAIYAMOK 0x0E46
// Minimum word size
#define THAI_MIN_WORD 2
// Minimum number of characters for two words
#define THAI_MIN_WORD_SPAN (THAI_MIN_WORD * 2)
ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1<<UBRK_WORD) | (1<<UBRK_LINE)),
fDictionary(adoptDictionary)
{
fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fThaiWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fThaiWordSet;
fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
fSuffixSet.add(THAI_PAIYANNOI);
fSuffixSet.add(THAI_MAIYAMOK);
// Compact for caching.
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
fSuffixSet.compact();
}
ThaiBreakEngine::~ThaiBreakEngine() {
delete fDictionary;
}
int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
if ((rangeEnd - rangeStart) < THAI_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
uint32_t wordsFound = 0;
int32_t wordLength;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[THAI_LOOKAHEAD];
UChar32 uc;
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
wordLength = 0;
// Look for candidate words at the current position
int candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
// If we found exactly one, use that
if (candidates == 1) {
wordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// If there was more than one, see which one can take us forward the most words
else if (candidates > 1) {
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
do {
int wordsMatched = 1;
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
// See if any of the possible second words is followed by a third word
do {
// If we find a third word, stop right away
if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
words[wordsFound % THAI_LOOKAHEAD].markCurrent();
goto foundBest;
}
}
while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text));
}
}
while (words[wordsFound % THAI_LOOKAHEAD].backUp(text));
foundBest:
wordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// We come here after having either found a word or not. We look ahead to the
// next word. If it's not a dictionary word, we will combine it withe the word we
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& (wordLength == 0
|| words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
// Look for a plausible word boundary
//TODO: This section will need a rework for UText.
int32_t remaining = rangeEnd - (current+wordLength);
UChar32 pc = utext_current32(text);
int32_t chars = 0;
for (;;) {
utext_next32(text);
uc = utext_current32(text);
// TODO: Here we're counting on the fact that the SA languages are all
// in the BMP. This should get fixed with the UText rework.
chars += 1;
if (--remaining <= 0) {
break;
}
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
// NOTE: In the original Apple code, checked that the next
// two characters after uc were not 0x0E4C THANTHAKHAT before
// checking the dictionary. That is just a performance filter,
// but it's not clear it's faster than checking the trie.
int candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current + wordLength + chars);
if (candidates > 0) {
break;
}
}
pc = uc;
}
// Bump the word count if there wasn't already one
if (wordLength <= 0) {
wordsFound += 1;
}
// Update the length with the passed-over characters
wordLength += chars;
}
else {
// Back up to where we were for next iteration
utext_setNativeIndex(text, current+wordLength);
}
}
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
wordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
// resynch continues to function. For example, one of the suffix characters
// could be a typo in the middle of a word.
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& fSuffixSet.contains(uc = utext_current32(text))) {
if (uc == THAI_PAIYANNOI) {
if (!fSuffixSet.contains(utext_previous32(text))) {
// Skip over previous end and PAIYANNOI
utext_next32(text);
utext_next32(text);
wordLength += 1; // Add PAIYANNOI to word
uc = utext_current32(text); // Fetch next character
}
else {
// Restore prior position
utext_next32(text);
}
}
if (uc == THAI_MAIYAMOK) {
if (utext_previous32(text) != THAI_MAIYAMOK) {
// Skip over previous end and MAIYAMOK
utext_next32(text);
utext_next32(text);
wordLength += 1; // Add MAIYAMOK to word
}
else {
// Restore prior position
utext_next32(text);
}
}
}
else {
utext_setNativeIndex(text, current+wordLength);
}
}
// Did we find a word on this iteration? If so, push it on the break stack
if (wordLength > 0) {
foundBreaks.push((current+wordLength), status);
}
}
// Don't return a break for the end of the dictionary range if there is one there.
if (foundBreaks.peeki() >= rangeEnd) {
(void) foundBreaks.popi();
wordsFound -= 1;
}
return wordsFound;
}
// How many words in a row are "good enough"?
#define KHMER_LOOKAHEAD 3
// Will not combine a non-word with a preceding dictionary word longer than this
#define KHMER_ROOT_COMBINE_THRESHOLD 3
// Will not combine a non-word that shares at least this much prefix with a
// dictionary word, with a preceding word
#define KHMER_PREFIX_COMBINE_THRESHOLD 3
// Minimum word size
#define KHMER_MIN_WORD 2
// Minimum number of characters for two words
#define KHMER_MIN_WORD_SPAN (KHMER_MIN_WORD * 2)
KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
: DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
fDictionary(adoptDictionary)
{
fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
if (U_SUCCESS(status)) {
setCharacters(fKhmerWordSet);
}
fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
fMarkSet.add(0x0020);
fEndWordSet = fKhmerWordSet;
fBeginWordSet.add(0x1780, 0x17B3);
//fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
//fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
//fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word
fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters
//fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels
// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
// fSuffixSet.add(THAI_PAIYANNOI);
// fSuffixSet.add(THAI_MAIYAMOK);
// Compact for caching.
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
// fSuffixSet.compact();
}
KhmerBreakEngine::~KhmerBreakEngine() {
delete fDictionary;
}
int32_t
KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
uint32_t wordsFound = 0;
int32_t wordLength;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
PossibleWord words[KHMER_LOOKAHEAD];
UChar32 uc;
utext_setNativeIndex(text, rangeStart);
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
wordLength = 0;
// Look for candidate words at the current position
int candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
// If we found exactly one, use that
if (candidates == 1) {
wordLength = words[wordsFound%KHMER_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// If there was more than one, see which one can take us forward the most words
else if (candidates > 1) {
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
do {
int wordsMatched = 1;
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
goto foundBest;
}
// See if any of the possible second words is followed by a third word
do {
// If we find a third word, stop right away
if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
goto foundBest;
}
}
while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
}
}
while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
foundBest:
wordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
wordsFound += 1;
}
// We come here after having either found a word or not. We look ahead to the
// next word. If it's not a dictionary word, we will combine it with the word we
// just found (if there is one), but only if the preceding word does not exceed
// the threshold.
// The text iterator should now be positioned at the end of the word we found.
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
// if it is a dictionary word, do nothing. If it isn't, then if there is
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
&& (wordLength == 0
|| words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
// Look for a plausible word boundary
//TODO: This section will need a rework for UText.
int32_t remaining = rangeEnd - (current+wordLength);
UChar32 pc = utext_current32(text);
int32_t chars = 0;
for (;;) {
utext_next32(text);
uc = utext_current32(text);
// TODO: Here we're counting on the fact that the SA languages are all
// in the BMP. This should get fixed with the UText rework.
chars += 1;
if (--remaining <= 0) {
break;
}
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
// Maybe. See if it's in the dictionary.
int candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
utext_setNativeIndex(text, current+wordLength+chars);
if (candidates > 0) {
break;
}
}
pc = uc;
}
// Bump the word count if there wasn't already one
if (wordLength <= 0) {
wordsFound += 1;
}
// Update the length with the passed-over characters
wordLength += chars;
}
else {
// Back up to where we were for next iteration
utext_setNativeIndex(text, current+wordLength);
}
}
// Never stop before a combining mark.
int32_t currPos;
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
utext_next32(text);
wordLength += (int32_t)utext_getNativeIndex(text) - currPos;
}
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
// resynch continues to function. For example, one of the suffix characters
// could be a typo in the middle of a word.
// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
// && fSuffixSet.contains(uc = utext_current32(text))) {
// if (uc == KHMER_PAIYANNOI) {
// if (!fSuffixSet.contains(utext_previous32(text))) {
// // Skip over previous end and PAIYANNOI
// utext_next32(text);
// utext_next32(text);
// wordLength += 1; // Add PAIYANNOI to word
// uc = utext_current32(text); // Fetch next character
// }
// else {
// // Restore prior position
// utext_next32(text);
// }
// }
// if (uc == KHMER_MAIYAMOK) {
// if (utext_previous32(text) != KHMER_MAIYAMOK) {
// // Skip over previous end and MAIYAMOK
// utext_next32(text);
// utext_next32(text);
// wordLength += 1; // Add MAIYAMOK to word
// }
// else {
// // Restore prior position
// utext_next32(text);
// }
// }
// }
// else {
// utext_setNativeIndex(text, current+wordLength);
// }
// }
// Did we find a word on this iteration? If so, push it on the break stack
if (wordLength > 0) {
foundBreaks.push((current+wordLength), status);
}
}
// Don't return a break for the end of the dictionary range if there is one there.
if (foundBreaks.peeki() >= rangeEnd) {
(void) foundBreaks.popi();
wordsFound -= 1;
}
return wordsFound;
}
#if !UCONFIG_NO_NORMALIZATION
/*
******************************************************************
* CjkBreakEngine
*/
static const uint32_t kuint32max = 0xFFFFFFFF;
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
if (U_SUCCESS(status)) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
setCharacters(fHangulWordSet);
} else { //Chinese and Japanese
UnicodeSet cjSet;
cjSet.addAll(fHanWordSet);
cjSet.addAll(fKatakanaWordSet);
cjSet.addAll(fHiraganaWordSet);
cjSet.add(UNICODE_STRING_SIMPLE("\\uff70\\u30fc"));
setCharacters(cjSet);
}
}
}
CjkBreakEngine::~CjkBreakEngine(){
delete fDictionary;
}
// The katakanaCost values below are based on the length frequencies of all
// katakana phrases in the dictionary
static const int kMaxKatakanaLength = 8;
static const int kMaxKatakanaGroupLength = 20;
static const uint32_t maxSnlp = 255;
static inline uint32_t getKatakanaCost(int wordLength){
//TODO: fill array with actual values from dictionary!
static const uint32_t katakanaCost[kMaxKatakanaLength + 1]
= {8192, 984, 408, 240, 204, 252, 300, 372, 480};
return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
}
static inline bool isKatakana(uint16_t value) {
return (value >= 0x30A1u && value <= 0x30FEu && value != 0x30FBu) ||
(value >= 0xFF66u && value <= 0xFF9fu);
}
// A very simple helper class to streamline the buffer handling in
// divideUpDictionaryRange.
template<class T, size_t N>
class AutoBuffer {
public:
AutoBuffer(size_t size) : buffer(stackBuffer), capacity(N) {
if (size > N) {
buffer = reinterpret_cast<T*>(uprv_malloc(sizeof(T)*size));
capacity = size;
}
}
~AutoBuffer() {
if (buffer != stackBuffer)
uprv_free(buffer);
}
T* elems() {
return buffer;
}
const T& operator[] (size_t i) const {
return buffer[i];
}
T& operator[] (size_t i) {
return buffer[i];
}
// resize without copy
void resize(size_t size) {
if (size <= capacity)
return;
if (buffer != stackBuffer)
uprv_free(buffer);
buffer = reinterpret_cast<T*>(uprv_malloc(sizeof(T)*size));
capacity = size;
}
private:
T stackBuffer[N];
T* buffer;
AutoBuffer();
size_t capacity;
};
/*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
int32_t
CjkBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
if (rangeStart >= rangeEnd) {
return 0;
}
const size_t defaultInputLength = 80;
size_t inputLength = rangeEnd - rangeStart;
// TODO: Replace by UnicodeString.
AutoBuffer<UChar, defaultInputLength> charString(inputLength);
// Normalize the input string and put it in normalizedText.
// The map from the indices of the normalized input to the raw
// input is kept in charPositions.
UErrorCode status = U_ZERO_ERROR;
utext_extract(text, rangeStart, rangeEnd, charString.elems(), inputLength, &status);
if (U_FAILURE(status)) {
return 0;
}
UnicodeString inputString(charString.elems(), inputLength);
// TODO: Use Normalizer2.
UNormalizationMode norm_mode = UNORM_NFKC;
UBool isNormalized =
Normalizer::quickCheck(inputString, norm_mode, status) == UNORM_YES ||
Normalizer::isNormalized(inputString, norm_mode, status);
// TODO: Replace by UVector32.
AutoBuffer<int32_t, defaultInputLength> charPositions(inputLength + 1);
int numChars = 0;
UText normalizedText = UTEXT_INITIALIZER;
// Needs to be declared here because normalizedText holds onto its buffer.
UnicodeString normalizedString;
if (isNormalized) {
int32_t index = 0;
charPositions[0] = 0;
while(index < inputString.length()) {
index = inputString.moveIndex32(index, 1);
charPositions[++numChars] = index;
}
utext_openUnicodeString(&normalizedText, &inputString, &status);
}
else {
Normalizer::normalize(inputString, norm_mode, 0, normalizedString, status);
if (U_FAILURE(status)) {
return 0;
}
charPositions.resize(normalizedString.length() + 1);
Normalizer normalizer(charString.elems(), inputLength, norm_mode);
int32_t index = 0;
charPositions[0] = 0;
while(index < normalizer.endIndex()){
/* UChar32 uc = */ normalizer.next();
charPositions[++numChars] = index = normalizer.getIndex();
}
utext_openUnicodeString(&normalizedText, &normalizedString, &status);
}
if (U_FAILURE(status)) {
return 0;
}
// From this point on, all the indices refer to the indices of
// the normalized input string.
// bestSnlp[i] is the snlp of the best segmentation of the first i
// characters in the range to be matched.
// TODO: Replace by UVector32.
AutoBuffer<uint32_t, defaultInputLength> bestSnlp(numChars + 1);
bestSnlp[0] = 0;
for(int i = 1; i <= numChars; i++) {
bestSnlp[i] = kuint32max;
}
// prev[i] is the index of the last CJK character in the previous word in
// the best segmentation of the first i characters.
// TODO: Replace by UVector32.
AutoBuffer<int, defaultInputLength> prev(numChars + 1);
for(int i = 0; i <= numChars; i++){
prev[i] = -1;
}
const size_t maxWordSize = 20;
// TODO: Replace both with UVector32.
AutoBuffer<int32_t, maxWordSize> values(numChars);
AutoBuffer<int32_t, maxWordSize> lengths(numChars);
// Dynamic programming to find the best segmentation.
bool is_prev_katakana = false;
for (int32_t i = 0; i < numChars; ++i) {
//utext_setNativeIndex(text, rangeStart + i);
utext_setNativeIndex(&normalizedText, i);
if (bestSnlp[i] == kuint32max)
continue;
int32_t count;
// limit maximum word length matched to size of current substring
int32_t maxSearchLength = (i + maxWordSize < (size_t) numChars)? maxWordSize : (numChars - i);
fDictionary->matches(&normalizedText, maxSearchLength, lengths.elems(), count, maxSearchLength, values.elems());
// if there are no single character matches found in the dictionary
// starting with this charcter, treat character as a 1-character word
// with the highest value possible, i.e. the least likely to occur.
// Exclude Korean characters from this treatment, as they should be left
// together by default.
if((count == 0 || lengths[0] != 1) &&
!fHangulWordSet.contains(utext_current32(&normalizedText))) {
values[count] = maxSnlp;
lengths[count++] = 1;
}
for (int j = 0; j < count; j++) {
uint32_t newSnlp = bestSnlp[i] + values[j];
if (newSnlp < bestSnlp[lengths[j] + i]) {
bestSnlp[lengths[j] + i] = newSnlp;
prev[lengths[j] + i] = i;
}
}
// In Japanese,
// Katakana word in single character is pretty rare. So we apply
// the following heuristic to Katakana: any continuous run of Katakana
// characters is considered a candidate word with a default cost
// specified in the katakanaCost table according to its length.
//utext_setNativeIndex(text, rangeStart + i);
utext_setNativeIndex(&normalizedText, i);
bool is_katakana = isKatakana(utext_current32(&normalizedText));
if (!is_prev_katakana && is_katakana) {
int j = i + 1;
utext_next32(&normalizedText);
// Find the end of the continuous run of Katakana characters
while (j < numChars && (j - i) < kMaxKatakanaGroupLength &&
isKatakana(utext_current32(&normalizedText))) {
utext_next32(&normalizedText);
++j;
}
if ((j - i) < kMaxKatakanaGroupLength) {
uint32_t newSnlp = bestSnlp[i] + getKatakanaCost(j - i);
if (newSnlp < bestSnlp[j]) {
bestSnlp[j] = newSnlp;
prev[j] = i;
}
}
}
is_prev_katakana = is_katakana;
}
// Start pushing the optimal offset index into t_boundary (t for tentative).
// prev[numChars] is guaranteed to be meaningful.
// We'll first push in the reverse order, i.e.,
// t_boundary[0] = numChars, and afterwards do a swap.
// TODO: Replace by UVector32.
AutoBuffer<int, maxWordSize> t_boundary(numChars + 1);
int numBreaks = 0;
// No segmentation found, set boundary to end of range
if (bestSnlp[numChars] == kuint32max) {
t_boundary[numBreaks++] = numChars;
} else {
for (int i = numChars; i > 0; i = prev[i]) {
t_boundary[numBreaks++] = i;
}
U_ASSERT(prev[t_boundary[numBreaks - 1]] == 0);
}
// Reverse offset index in t_boundary.
// Don't add a break for the start of the dictionary range if there is one
// there already.
if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) {
t_boundary[numBreaks++] = 0;
}
// Now that we're done, convert positions in t_bdry[] (indices in
// the normalized input string) back to indices in the raw input string
// while reversing t_bdry and pushing values to foundBreaks.
for (int i = numBreaks-1; i >= 0; i--) {
foundBreaks.push(charPositions[t_boundary[i]] + rangeStart, status);
}
utext_close(&normalizedText);
return numBreaks;
}
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

Просмотреть файл

@ -0,0 +1,314 @@
/**
*******************************************************************************
* Copyright (C) 2006,2012, International Business Machines Corporation *
* and others. All Rights Reserved. *
*******************************************************************************
*/
#ifndef DICTBE_H
#define DICTBE_H
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/utext.h"
#include "brkeng.h"
U_NAMESPACE_BEGIN
class DictionaryMatcher;
/*******************************************************************
* DictionaryBreakEngine
*/
/**
* <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
* dictionary to determine language-specific breaks.</p>
*
* <p>After it is constructed a DictionaryBreakEngine may be shared between
* threads without synchronization.</p>
*/
class DictionaryBreakEngine : public LanguageBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fSet;
/**
* The set of break types handled by this engine
* @internal
*/
uint32_t fTypes;
/**
* <p>Default constructor.</p>
*
*/
DictionaryBreakEngine();
public:
/**
* <p>Constructor setting the break types handled.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
DictionaryBreakEngine( uint32_t breakTypes );
/**
* <p>Virtual destructor.</p>
*/
virtual ~DictionaryBreakEngine();
/**
* <p>Indicate whether this engine handles a particular character for
* a particular kind of break.</p>
*
* @param c A character which begins a run that the engine might handle
* @param breakType The type of text break which the caller wants to determine
* @return TRUE if this engine handles the particular character and break
* type.
*/
virtual UBool handles( UChar32 c, int32_t breakType ) const;
/**
* <p>Find any breaks within a run in the supplied text.</p>
*
* @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
protected:
/**
* <p>Set the character set handled by this engine.</p>
*
* @param set A UnicodeSet of the set of characters handled by the engine
*/
virtual void setCharacters( const UnicodeSet &set );
/**
* <p>Set the break types handled by this engine.</p>
*
* @param breakTypes A bitmap of types handled by the engine.
*/
// virtual void setBreakTypes( uint32_t breakTypes );
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const = 0;
};
/*******************************************************************
* ThaiBreakEngine
*/
/**
* <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
* dictionary and heuristics to determine Thai-specific breaks.</p>
*
* <p>After it is constructed a ThaiBreakEngine may be shared between
* threads without synchronization.</p>
*/
class ThaiBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fThaiWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fSuffixSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~ThaiBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
#if !UCONFIG_NO_NORMALIZATION
/*******************************************************************
* CjkBreakEngine
*/
//indicates language/script that the CjkBreakEngine will handle
enum LanguageType {
kKorean,
kChineseJapanese
};
/**
* <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
* dictionary with costs associated with each word and
* Viterbi decoding to determine CJK-specific breaks.</p>
*/
class CjkBreakEngine : public DictionaryBreakEngine {
protected:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fHangulWordSet;
UnicodeSet fHanWordSet;
UnicodeSet fKatakanaWordSet;
UnicodeSet fHiraganaWordSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted. The DictionaryMatcher must contain costs for each word
* in order for the dictionary to work properly.
*/
CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~CjkBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters handled by this break engine.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
#endif
/*******************************************************************
* KhmerBreakEngine
*/
/**
* <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
* DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
*
* <p>After it is constructed a KhmerBreakEngine may be shared between
* threads without synchronization.</p>
*/
class KhmerBreakEngine : public DictionaryBreakEngine {
private:
/**
* The set of characters handled by this engine
* @internal
*/
UnicodeSet fKhmerWordSet;
UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
UnicodeSet fMarkSet;
DictionaryMatcher *fDictionary;
public:
/**
* <p>Default constructor.</p>
*
* @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
* engine is deleted.
*/
KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
/**
* <p>Virtual destructor.</p>
*/
virtual ~KhmerBreakEngine();
protected:
/**
* <p>Divide up a range of known dictionary characters.</p>
*
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @return The number of breaks found
*/
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
};
U_NAMESPACE_END
/* DICTBE_H */
#endif

Просмотреть файл

@ -0,0 +1,228 @@
/*
*******************************************************************************
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* dictionarydata.h
*
* created on: 2012may31
* created by: Markus W. Scherer & Maxime Serrano
*/
#include "dictionarydata.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "unicode/udata.h"
#include "cmemory.h"
#if !UCONFIG_NO_BREAK_ITERATION
U_NAMESPACE_BEGIN
#ifndef CYGWINMSVC /* On Cygwin/MSVC, the error redefinition of symbols occurs.*/
const int32_t DictionaryData::TRIE_TYPE_BYTES;
const int32_t DictionaryData::TRIE_TYPE_UCHARS;
#endif
DictionaryMatcher::~DictionaryMatcher() {
}
UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
udata_close(file);
}
int32_t UCharsDictionaryMatcher::getType() const {
return DictionaryData::TRIE_TYPE_UCHARS;
}
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
UCharsTrie uct(characters);
UChar32 c = utext_next32(text);
if (c < 0) {
return 0;
}
UStringTrieResult result = uct.first(c);
int32_t numChars = 1;
count = 0;
for (;;) {
if (USTRINGTRIE_HAS_VALUE(result)) {
if (count < limit) {
if (values != NULL) {
values[count] = uct.getValue();
}
lengths[count++] = numChars;
}
if (result == USTRINGTRIE_FINAL_VALUE) {
break;
}
}
else if (result == USTRINGTRIE_NO_MATCH) {
break;
}
// TODO: why do we have a text limit if the UText knows its length?
if (numChars >= maxLength) {
break;
}
c = utext_next32(text);
if (c < 0) {
break;
}
++numChars;
result = uct.next(c);
}
return numChars;
}
BytesDictionaryMatcher::~BytesDictionaryMatcher() {
udata_close(file);
}
UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
if (c == 0x200D) {
return 0xFF;
} else if (c == 0x200C) {
return 0xFE;
}
int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
if (delta < 0 || 0xFD < delta) {
return U_SENTINEL;
}
return (UChar32)delta;
}
return c;
}
int32_t BytesDictionaryMatcher::getType() const {
return DictionaryData::TRIE_TYPE_BYTES;
}
int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
BytesTrie bt(characters);
UChar32 c = utext_next32(text);
if (c < 0) {
return 0;
}
UStringTrieResult result = bt.first(transform(c));
int32_t numChars = 1;
count = 0;
for (;;) {
if (USTRINGTRIE_HAS_VALUE(result)) {
if (count < limit) {
if (values != NULL) {
values[count] = bt.getValue();
}
lengths[count++] = numChars;
}
if (result == USTRINGTRIE_FINAL_VALUE) {
break;
}
}
else if (result == USTRINGTRIE_NO_MATCH) {
break;
}
// TODO: why do we have a text limit if the UText knows its length?
if (numChars >= maxLength) {
break;
}
c = utext_next32(text);
if (c < 0) {
break;
}
++numChars;
result = bt.next(transform(c));
}
return numChars;
}
U_NAMESPACE_END
U_NAMESPACE_USE
U_CAPI int32_t U_EXPORT2
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
void *outData, UErrorCode *pErrorCode) {
const UDataInfo *pInfo;
int32_t headerSize;
const uint8_t *inBytes;
uint8_t *outBytes;
const int32_t *inIndexes;
int32_t indexes[DictionaryData::IX_COUNT];
int32_t i, offset, size;
headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
pInfo = (const UDataInfo *)((const char *)inData + 4);
if (!(pInfo->dataFormat[0] == 0x44 &&
pInfo->dataFormat[1] == 0x69 &&
pInfo->dataFormat[2] == 0x63 &&
pInfo->dataFormat[3] == 0x74 &&
pInfo->formatVersion[0] == 1)) {
udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
*pErrorCode = U_UNSUPPORTED_ERROR;
return 0;
}
inBytes = (const uint8_t *)inData + headerSize;
outBytes = (uint8_t *)outData + headerSize;
inIndexes = (const int32_t *)inBytes;
if (length >= 0) {
length -= headerSize;
if (length < (int32_t)(sizeof(indexes))) {
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
for (i = 0; i < DictionaryData::IX_COUNT; i++) {
indexes[i] = udata_readInt32(ds, inIndexes[i]);
}
size = indexes[DictionaryData::IX_TOTAL_SIZE];
if (length >= 0) {
if (length < size) {
udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
*pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if (inBytes != outBytes) {
uprv_memcpy(outBytes, inBytes, size);
}
offset = 0;
ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
offset = (int32_t)sizeof(indexes);
int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
} else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
// nothing to do
} else {
udata_printError(ds, "udict_swap(): unknown trie type!\n");
*pErrorCode = U_UNSUPPORTED_ERROR;
return 0;
}
// these next two sections are empty in the current format,
// but may be used later.
offset = nextOffset;
nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
offset = nextOffset;
nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
offset = nextOffset;
}
return headerSize + size;
}
#endif

Просмотреть файл

@ -0,0 +1,165 @@
/*
*******************************************************************************
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* dictionarydata.h
*
* created on: 2012may31
* created by: Markus W. Scherer & Maxime Serrano
*/
#ifndef __DICTIONARYDATA_H__
#define __DICTIONARYDATA_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/utext.h"
#include "unicode/udata.h"
#include "udataswp.h"
#include "unicode/uobject.h"
#include "unicode/ustringtrie.h"
U_NAMESPACE_BEGIN
class UCharsTrie;
class BytesTrie;
class U_COMMON_API DictionaryData : public UMemory {
public:
static const int32_t TRIE_TYPE_BYTES = 0;
static const int32_t TRIE_TYPE_UCHARS = 1;
static const int32_t TRIE_TYPE_MASK = 7;
static const int32_t TRIE_HAS_VALUES = 8;
static const int32_t TRANSFORM_NONE = 0;
static const int32_t TRANSFORM_TYPE_OFFSET = 0x1000000;
static const int32_t TRANSFORM_TYPE_MASK = 0x7f000000;
static const int32_t TRANSFORM_OFFSET_MASK = 0x1fffff;
enum {
// Byte offsets from the start of the data, after the generic header.
IX_STRING_TRIE_OFFSET,
IX_RESERVED1_OFFSET,
IX_RESERVED2_OFFSET,
IX_TOTAL_SIZE,
// Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc.
IX_TRIE_TYPE,
// Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc.
IX_TRANSFORM,
IX_RESERVED6,
IX_RESERVED7,
IX_COUNT
};
};
/**
* Wrapper class around generic dictionaries, implementing matches().
* getType() should return a TRIE_TYPE_??? constant from DictionaryData.
*
* All implementations of this interface must be thread-safe if they are to be used inside of the
* dictionary-based break iteration code.
*/
class U_COMMON_API DictionaryMatcher : public UMemory {
public:
virtual ~DictionaryMatcher();
// this should emulate CompactTrieDictionary::matches()
virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
int32_t limit, int32_t *values = NULL) const = 0;
/** @return DictionaryData::TRIE_TYPE_XYZ */
virtual int32_t getType() const = 0;
};
// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary
class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher {
public:
// constructs a new UCharsDictionaryMatcher.
// The UDataMemory * will be closed on this object's destruction.
UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
virtual ~UCharsDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
int32_t limit, int32_t *values = NULL) const;
virtual int32_t getType() const;
private:
const UChar *characters;
UDataMemory *file;
};
// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary
class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher {
public:
// constructs a new BytesTrieDictionaryMatcher
// the transform constant should be the constant read from the file, not a masked version!
// the UDataMemory * fed in here will be closed on this object's destruction
BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
: characters(c), transformConstant(t), file(f) { }
virtual ~BytesDictionaryMatcher();
virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
int32_t limit, int32_t *values = NULL) const;
virtual int32_t getType() const;
private:
UChar32 transform(UChar32 c) const;
const char *characters;
int32_t transformConstant;
UDataMemory *file;
};
U_NAMESPACE_END
U_CAPI int32_t U_EXPORT2
udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode);
/**
* Format of dictionary .dict data files.
* Format version 1.0.
*
* A dictionary .dict data file contains a byte-serialized BytesTrie or
* a UChars-serialized UCharsTrie.
* Such files are used in dictionary-based break iteration (DBBI).
*
* For a BytesTrie, a transformation type is specified for
* transforming Unicode strings into byte sequences.
*
* A .dict file begins with a standard ICU data file header
* (DataHeader, see ucmndata.h and unicode/udata.h).
* The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0).
*
* After the header, the file contains the following parts.
* Constants are defined in the DictionaryData class.
*
* For the data structure of BytesTrie & UCharsTrie see
* http://site.icu-project.org/design/struct/tries
* and the bytestrie.h and ucharstrie.h header files.
*
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;
*
* The first four indexes are byte offsets in ascending order.
* Each byte offset marks the start of the next part in the data file,
* and the end of the previous one.
* When two consecutive byte offsets are the same, then the corresponding part is empty.
* Byte offsets are offsets from after the header,
* that is, from the beginning of the indexes[].
* Each part starts at an offset with proper alignment for its data.
* If necessary, the previous part may include padding bytes to achieve this alignment.
*
* trieType=indexes[IX_TRIE_TYPE] defines the trie type.
* transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation.
* If the transformation type is TRANSFORM_TYPE_OFFSET,
* then the lower 21 bits contain the offset code point.
* Each code point c is mapped to byte b = (c - offset).
* Code points outside the range offset..(offset+0xff) cannot be mapped
* and do not occur in the dictionary.
*
* stringTrie; -- a serialized BytesTrie or UCharsTrie
*
* The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType),
* or it maps all strings to 0 (TRIE_HAS_VALUES bit not set).
*/
#endif /* !UCONFIG_NO_BREAK_ITERATION */
#endif /* __DICTIONARYDATA_H__ */

Просмотреть файл

@ -0,0 +1,61 @@
/*******************************************************************************
* Copyright (C) 2008, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
* File DTINTRV.CPP
*
*******************************************************************************
*/
#include "unicode/dtintrv.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
//DateInterval::DateInterval(){}
DateInterval::DateInterval(UDate from, UDate to)
: fromDate(from),
toDate(to)
{}
DateInterval::~DateInterval(){}
DateInterval::DateInterval(const DateInterval& other)
: UObject(other) {
*this = other;
}
DateInterval&
DateInterval::operator=(const DateInterval& other) {
if ( this != &other ) {
fromDate = other.fromDate;
toDate = other.toDate;
}
return *this;
}
DateInterval*
DateInterval::clone() const {
return new DateInterval(*this);
}
UBool
DateInterval::operator==(const DateInterval& other) const {
return ( fromDate == other.fromDate && toDate == other.toDate );
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,40 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: errorcode.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009mar10
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "unicode/errorcode.h"
U_NAMESPACE_BEGIN
ErrorCode::~ErrorCode() {}
UErrorCode ErrorCode::reset() {
UErrorCode code = errorCode;
errorCode = U_ZERO_ERROR;
return code;
}
void ErrorCode::assertSuccess() const {
if(isFailure()) {
handleFailure();
}
}
const char* ErrorCode::errorName() const {
return u_errorName(errorCode);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,288 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: filterednormalizer2.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009dec10
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cpputils.h"
U_NAMESPACE_BEGIN
FilteredNormalizer2::~FilteredNormalizer2() {}
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(src, errorCode);
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
}
if(&dest==&src) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return dest;
}
dest.remove();
return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
}
// Internal: No argument checking, and appends to dest.
// Pass as input spanCondition the one that is likely to yield a non-zero
// span length at the start of src.
// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
// USET_SPAN_SIMPLE should be passed in for the start of src
// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
// an in-filter prefix.
UnicodeString &
FilteredNormalizer2::normalize(const UnicodeString &src,
UnicodeString &dest,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const {
UnicodeString tempDest; // Don't throw away destination buffer between iterations.
for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
int32_t spanLength=spanLimit-prevSpanLimit;
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
if(spanLength!=0) {
dest.append(src, prevSpanLimit, spanLength);
}
spanCondition=USET_SPAN_SIMPLE;
} else {
if(spanLength!=0) {
// Not norm2.normalizeSecondAndAppend() because we do not want
// to modify the non-filter part of dest.
dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
tempDest, errorCode));
if(U_FAILURE(errorCode)) {
break;
}
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return dest;
}
UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, TRUE, errorCode);
}
UnicodeString &
FilteredNormalizer2::append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
return normalizeSecondAndAppend(first, second, FALSE, errorCode);
}
UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UBool doNormalize,
UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(first, errorCode);
uprv_checkCanGetBuffer(second, errorCode);
if(U_FAILURE(errorCode)) {
return first;
}
if(&first==&second) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return first;
}
if(first.isEmpty()) {
if(doNormalize) {
return normalize(second, first, errorCode);
} else {
return first=second;
}
}
// merge the in-filter suffix of the first string with the in-filter prefix of the second
int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
if(prefixLimit!=0) {
UnicodeString prefix(second.tempSubString(0, prefixLimit));
int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
if(suffixStart==0) {
if(doNormalize) {
norm2.normalizeSecondAndAppend(first, prefix, errorCode);
} else {
norm2.append(first, prefix, errorCode);
}
} else {
UnicodeString middle(first, suffixStart, INT32_MAX);
if(doNormalize) {
norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
} else {
norm2.append(middle, prefix, errorCode);
}
first.replace(suffixStart, INT32_MAX, middle);
}
}
if(prefixLimit<second.length()) {
UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
if(doNormalize) {
normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
} else {
first.append(rest);
}
}
return first;
}
UBool
FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
return set.contains(c) && norm2.getDecomposition(c, decomposition);
}
UBool
FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
}
UChar32
FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
}
uint8_t
FilteredNormalizer2::getCombiningClass(UChar32 c) const {
return set.contains(c) ? norm2.getCombiningClass(c) : 0;
}
UBool
FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
}
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
U_FAILURE(errorCode)
) {
return FALSE;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return TRUE;
}
UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
UNormalizationCheckResult result=UNORM_YES;
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
UNormalizationCheckResult qcResult=
norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
return qcResult;
} else if(qcResult==UNORM_MAYBE) {
result=qcResult;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return result;
}
int32_t
FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);
if(U_FAILURE(errorCode)) {
return 0;
}
USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
if(spanCondition==USET_SPAN_NOT_CONTAINED) {
spanCondition=USET_SPAN_SIMPLE;
} else {
int32_t yesLimit=
prevSpanLimit+
norm2.spanQuickCheckYes(
s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
return yesLimit;
}
spanCondition=USET_SPAN_NOT_CONTAINED;
}
prevSpanLimit=spanLimit;
}
return s.length();
}
UBool
FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
return !set.contains(c) || norm2.hasBoundaryBefore(c);
}
UBool
FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
return !set.contains(c) || norm2.hasBoundaryAfter(c);
}
UBool
FilteredNormalizer2::isInert(UChar32 c) const {
return !set.contains(c) || norm2.isInert(c);
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
U_CAPI UNormalizer2 * U_EXPORT2
unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(filterSet==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
*UnicodeSet::fromUSet(filterSet));
if(fn2==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
}
return (UNormalizer2 *)fn2;
}
#endif // !UCONFIG_NO_NORMALIZATION

Просмотреть файл

@ -0,0 +1,208 @@
/*
******************************************************************************
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
* 03/28/00 aliu Creation.
******************************************************************************
*/
#ifndef HASH_H
#define HASH_H
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "cmemory.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
/**
* Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void*
* hashtable implemented in C. Hashtable is designed to be idiomatic and
* easy-to-use in C++.
*
* Hashtable is an INTERNAL CLASS.
*/
class U_COMMON_API Hashtable : public UMemory {
UHashtable* hash;
UHashtable hashObj;
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
public:
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param keyComp Comparator for comparing the keys
* @param valueComp Comparator for comparing the values
* @param status Error code
*/
Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
/**
* Construct a hashtable
* @param status Error code
*/
Hashtable(UErrorCode& status);
/**
* Construct a hashtable, _disregarding any error_. Use this constructor
* with caution.
*/
Hashtable();
/**
* Non-virtual destructor; make this virtual if Hashtable is subclassed
* in the future.
*/
~Hashtable();
UObjectDeleter *setValueDeleter(UObjectDeleter *fn);
int32_t count() const;
void* put(const UnicodeString& key, void* value, UErrorCode& status);
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
void removeAll(void);
const UHashElement* find(const UnicodeString& key) const;
const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
private:
Hashtable(const Hashtable &other); // forbid copying of this class
Hashtable &operator=(const Hashtable &other); // forbid copying of this class
};
/*********************************************************************
* Implementation
********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL,
status);
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::Hashtable()
: hash(0)
{
UErrorCode status = U_ZERO_ERROR;
init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status);
}
inline Hashtable::~Hashtable() {
if (hash != NULL) {
uhash_close(hash);
}
}
inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) {
return uhash_setValueDeleter(hash, fn);
}
inline int32_t Hashtable::count() const {
return uhash_count(hash);
}
inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) {
return uhash_put(hash, new UnicodeString(key), value, &status);
}
inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) {
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}
inline const UHashElement* Hashtable::nextElement(int32_t& pos) const {
return uhash_nextElement(hash, &pos);
}
inline void Hashtable::removeAll(void) {
uhash_removeAll(hash);
}
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}
inline UBool Hashtable::equals(const Hashtable& that)const{
return uhash_equals(hash, that.hash);
}
U_NAMESPACE_END
#endif

Просмотреть файл

@ -0,0 +1,29 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/icudataver.h"
#include "unicode/ures.h"
#include "uresimp.h" /* for ures_getVersionByKey */
U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) {
UResourceBundle *icudatares = NULL;
if (U_FAILURE(*status)) {
return;
}
if (dataVersionFillin != NULL) {
icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status);
if (U_SUCCESS(*status)) {
ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status);
}
ures_close(icudatares);
}
}

Просмотреть файл

@ -0,0 +1,843 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : icuplug.c
*
* Date Name Description
* 10/29/2009 sl New.
******************************************************************************
*/
#include "unicode/icuplug.h"
#include "icuplugimp.h"
#include "cstring.h"
#include "cmemory.h"
#include "putilimp.h"
#include "ucln.h"
#include <stdio.h>
#ifdef __MVS__ /* defined by z/OS compiler */
#define _POSIX_SOURCE
#include <cics.h> /* 12 Nov 2011 JAM iscics() function */
#endif
#ifndef UPLUG_TRACE
#define UPLUG_TRACE 0
#endif
#if UPLUG_TRACE
#include <stdio.h>
#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x
#endif
/**
* Internal structure of an ICU plugin.
*/
struct UPlugData {
UPlugEntrypoint *entrypoint; /**< plugin entrypoint */
uint32_t structSize; /**< initialized to the size of this structure */
uint32_t token; /**< must be U_PLUG_TOKEN */
void *lib; /**< plugin library, or NULL */
char libName[UPLUG_NAME_MAX]; /**< library name */
char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */
char config[UPLUG_NAME_MAX]; /**< configuration data */
void *context; /**< user context data */
char name[UPLUG_NAME_MAX]; /**< name of plugin */
UPlugLevel level; /**< level of plugin */
UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */
UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */
UErrorCode pluginStatus; /**< status code of plugin */
};
#define UPLUG_LIBRARY_INITIAL_COUNT 8
#define UPLUG_PLUGIN_INITIAL_COUNT 12
/**
* Remove an item
* @param list the full list
* @param listSize the number of entries in the list
* @param memberSize the size of one member
* @param itemToRemove the item number of the member
* @return the new listsize
*/
static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) {
uint8_t *bytePtr = (uint8_t *)list;
/* get rid of some bad cases first */
if(listSize<1) {
return listSize;
}
/* is there anything to move? */
if(listSize > itemToRemove+1) {
memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize);
}
return listSize-1;
}
#if U_ENABLE_DYLOAD
/**
* Library management. Internal.
* @internal
*/
struct UPlugLibrary;
/**
* Library management. Internal.
* @internal
*/
typedef struct UPlugLibrary {
void *lib; /**< library ptr */
char name[UPLUG_NAME_MAX]; /**< library name */
uint32_t ref; /**< reference count */
} UPlugLibrary;
static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT];
static UPlugLibrary * libraryList = staticLibraryList;
static int32_t libraryCount = 0;
static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT;
/**
* Search for a library. Doesn't lock
* @param libName libname to search for
* @return the library's struct
*/
static int32_t searchForLibraryName(const char *libName) {
int32_t i;
for(i=0;i<libraryCount;i++) {
if(!uprv_strcmp(libName, libraryList[i].name)) {
return i;
}
}
return -1;
}
static int32_t searchForLibrary(void *lib) {
int32_t i;
for(i=0;i<libraryCount;i++) {
if(lib==libraryList[i].lib) {
return i;
}
}
return -1;
}
U_INTERNAL char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status) {
int32_t libEnt;
char *ret = NULL;
if(U_FAILURE(*status)) {
return NULL;
}
libEnt = searchForLibrary(lib);
if(libEnt!=-1) {
ret = libraryList[libEnt].name;
} else {
*status = U_MISSING_RESOURCE_ERROR;
}
return ret;
}
U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status) {
int32_t libEntry = -1;
void *lib = NULL;
if(U_FAILURE(*status)) return NULL;
libEntry = searchForLibraryName(libName);
if(libEntry == -1) {
libEntry = libraryCount++;
if(libraryCount >= libraryMax) {
/* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */
*status = U_MEMORY_ALLOCATION_ERROR;
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax));
#endif
return NULL;
}
/* Some operating systems don't want
DL operations from multiple threads. */
libraryList[libEntry].lib = uprv_dl_open(libName, status);
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) {
/* cleanup. */
libraryList[libEntry].lib = NULL; /* failure with open */
libraryList[libEntry].name[0] = 0;
#if UPLUG_TRACE
DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib));
#endif
/* no need to free - just won't increase the count. */
libraryCount--;
} else { /* is it still there? */
/* link it in */
uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX);
libraryList[libEntry].ref=1;
lib = libraryList[libEntry].lib;
}
} else {
lib = libraryList[libEntry].lib;
libraryList[libEntry].ref++;
}
return lib;
}
U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status) {
int32_t i;
#if UPLUG_TRACE
DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList));
#endif
if(U_FAILURE(*status)) return;
for(i=0;i<libraryCount;i++) {
if(lib==libraryList[i].lib) {
if(--(libraryList[i].ref) == 0) {
uprv_dl_close(libraryList[i].lib, status);
libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i);
}
return;
}
}
*status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */
}
#endif
static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT];
static int32_t pluginCount = 0;
static int32_t uplug_pluginNumber(UPlugData* d) {
UPlugData *pastPlug = &pluginList[pluginCount];
if(d<=pluginList) {
return 0;
} else if(d>=pastPlug) {
return pluginCount;
} else {
return (d-pluginList)/sizeof(pluginList[0]);
}
}
U_CAPI UPlugData * U_EXPORT2
uplug_nextPlug(UPlugData *prior) {
if(prior==NULL) {
return pluginList;
} else {
UPlugData *nextPlug = &prior[1];
UPlugData *pastPlug = &pluginList[pluginCount];
if(nextPlug>=pastPlug) {
return NULL;
} else {
return nextPlug;
}
}
}
/**
* Call the plugin with some params
*/
static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) {
UPlugTokenReturn token;
if(plug==NULL||U_FAILURE(*status)) {
return;
}
token = (*(plug->entrypoint))(plug, reason, status);
if(token!=UPLUG_TOKEN) {
*status = U_INTERNAL_PROGRAM_ERROR;
}
}
static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
if(U_SUCCESS(plug->pluginStatus)) {
/* Don't unload a plug which has a failing load status - means it didn't actually load. */
uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status);
}
}
static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
plug->level = UPLUG_LEVEL_INVALID;
uplug_callPlug(plug, UPLUG_REASON_QUERY, status);
if(U_SUCCESS(*status)) {
if(plug->level == UPLUG_LEVEL_INVALID) {
plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
plug->awaitingLoad = FALSE;
}
} else {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
plug->awaitingLoad = FALSE;
}
}
static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
*status = U_INTERNAL_PROGRAM_ERROR;
return;
}
uplug_callPlug(plug, UPLUG_REASON_LOAD, status);
plug->awaitingLoad = FALSE;
if(!U_SUCCESS(*status)) {
plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
}
}
static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status)
{
UPlugData *plug = NULL;
if(U_FAILURE(*status)) {
return NULL;
}
if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
plug = &pluginList[pluginCount++];
plug->token = UPLUG_TOKEN;
plug->structSize = sizeof(UPlugData);
plug->name[0]=0;
plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */
plug->awaitingLoad = TRUE;
plug->dontUnload = FALSE;
plug->pluginStatus = U_ZERO_ERROR;
plug->libName[0] = 0;
plug->config[0]=0;
plug->sym[0]=0;
plug->lib=NULL;
plug->entrypoint=NULL;
return plug;
}
static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName,
UErrorCode *status) {
UPlugData *plug;
if(U_FAILURE(*status)) {
return NULL;
}
plug = uplug_allocateEmptyPlug(status);
if(config!=NULL) {
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
} else {
plug->config[0] = 0;
}
if(symName!=NULL) {
uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX);
} else {
plug->sym[0] = 0;
}
plug->entrypoint = entrypoint;
plug->lib = lib;
uplug_queryPlug(plug, status);
return plug;
}
static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) {
UErrorCode subStatus = U_ZERO_ERROR;
if(!plug->dontUnload) {
#if U_ENABLE_DYLOAD
uplug_closeLibrary(plug->lib, &subStatus);
#endif
}
plug->lib = NULL;
if(U_SUCCESS(*status) && U_FAILURE(subStatus)) {
*status = subStatus;
}
/* shift plugins up and decrement count. */
if(U_SUCCESS(*status)) {
/* all ok- remove. */
pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug));
} else {
/* not ok- leave as a message. */
plug->awaitingLoad=FALSE;
plug->entrypoint=0;
plug->dontUnload=TRUE;
}
}
static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) {
if(plugToRemove != NULL) {
uplug_unloadPlug(plugToRemove, status);
uplug_deallocatePlug(plugToRemove, status);
}
}
U_CAPI void U_EXPORT2
uplug_removePlug(UPlugData *plug, UErrorCode *status) {
UPlugData *cursor = NULL;
UPlugData *plugToRemove = NULL;
if(U_FAILURE(*status)) return;
for(cursor=pluginList;cursor!=NULL;) {
if(cursor==plug) {
plugToRemove = plug;
cursor=NULL;
} else {
cursor = uplug_nextPlug(cursor);
}
}
uplug_doUnloadPlug(plugToRemove, status);
}
U_CAPI void U_EXPORT2
uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload)
{
data->dontUnload = dontUnload;
}
U_CAPI void U_EXPORT2
uplug_setPlugLevel(UPlugData *data, UPlugLevel level) {
data->level = level;
}
U_CAPI UPlugLevel U_EXPORT2
uplug_getPlugLevel(UPlugData *data) {
return data->level;
}
U_CAPI void U_EXPORT2
uplug_setPlugName(UPlugData *data, const char *name) {
uprv_strncpy(data->name, name, UPLUG_NAME_MAX);
}
U_CAPI const char * U_EXPORT2
uplug_getPlugName(UPlugData *data) {
return data->name;
}
U_CAPI const char * U_EXPORT2
uplug_getSymbolName(UPlugData *data) {
return data->sym;
}
U_CAPI const char * U_EXPORT2
uplug_getLibraryName(UPlugData *data, UErrorCode *status) {
if(data->libName[0]) {
return data->libName;
} else {
#if U_ENABLE_DYLOAD
return uplug_findLibrary(data->lib, status);
#else
return NULL;
#endif
}
}
U_CAPI void * U_EXPORT2
uplug_getLibrary(UPlugData *data) {
return data->lib;
}
U_CAPI void * U_EXPORT2
uplug_getContext(UPlugData *data) {
return data->context;
}
U_CAPI void U_EXPORT2
uplug_setContext(UPlugData *data, void *context) {
data->context = context;
}
U_CAPI const char* U_EXPORT2
uplug_getConfiguration(UPlugData *data) {
return data->config;
}
U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n) {
if(n <0 || n >= pluginCount) {
return NULL;
} else {
return &(pluginList[n]);
}
}
U_CAPI UErrorCode U_EXPORT2
uplug_getPlugLoadStatus(UPlugData *plug) {
return plug->pluginStatus;
}
/**
* Initialize a plugin fron an entrypoint and library - but don't load it.
*/
static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
UErrorCode *status) {
UPlugData *plug = NULL;
plug = uplug_allocatePlug(entrypoint, config, lib, sym, status);
if(U_SUCCESS(*status)) {
return plug;
} else {
uplug_deallocatePlug(plug, status);
return NULL;
}
}
U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) {
UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status);
uplug_loadPlug(plug, status);
return plug;
}
#if U_ENABLE_DYLOAD
static UPlugData*
uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status)
{
UPlugData *plug = uplug_allocateEmptyPlug(status);
if(U_FAILURE(*status)) return NULL;
plug->pluginStatus = loadStatus;
plug->awaitingLoad = FALSE; /* Won't load. */
plug->dontUnload = TRUE; /* cannot unload. */
if(sym!=NULL) {
uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX);
}
if(libName!=NULL) {
uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX);
}
if(nameOrError!=NULL) {
uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX);
}
if(config!=NULL) {
uprv_strncpy(plug->config, config, UPLUG_NAME_MAX);
}
return plug;
}
/**
* Fetch a plugin from DLL, and then initialize it from a library- but don't load it.
*/
static UPlugData*
uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
void *lib = NULL;
UPlugData *plug = NULL;
if(U_FAILURE(*status)) { return NULL; }
lib = uplug_openLibrary(libName, status);
if(lib!=NULL && U_SUCCESS(*status)) {
UPlugEntrypoint *entrypoint = NULL;
entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status);
if(entrypoint!=NULL&&U_SUCCESS(*status)) {
plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status);
if(plug!=NULL&&U_SUCCESS(*status)) {
plug->lib = lib; /* plug takes ownership of library */
lib = NULL; /* library is now owned by plugin. */
}
} else {
UErrorCode subStatus = U_ZERO_ERROR;
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
}
if(lib!=NULL) { /* still need to close the lib */
UErrorCode subStatus = U_ZERO_ERROR;
uplug_closeLibrary(lib, &subStatus); /* don't care here */
}
} else {
UErrorCode subStatus = U_ZERO_ERROR;
plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus);
}
return plug;
}
U_CAPI UPlugData* U_EXPORT2
uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) {
UPlugData *plug = NULL;
if(U_FAILURE(*status)) { return NULL; }
plug = uplug_initPlugFromLibrary(libName, sym, config, status);
uplug_loadPlug(plug, status);
return plug;
}
#endif
U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() {
if(cmemory_inUse()) {
return UPLUG_LEVEL_HIGH;
} else {
return UPLUG_LEVEL_LOW;
}
}
static UBool U_CALLCONV uplug_cleanup(void)
{
int32_t i;
UPlugData *pluginToRemove;
/* cleanup plugs */
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
pluginToRemove = &pluginList[i];
/* unload and deallocate */
uplug_doUnloadPlug(pluginToRemove, &subStatus);
}
/* close other held libs? */
return TRUE;
}
#if U_ENABLE_DYLOAD
static void uplug_loadWaitingPlugs(UErrorCode *status) {
int32_t i;
UPlugLevel currentLevel = uplug_getCurrentLevel();
if(U_FAILURE(*status)) {
return;
}
#if UPLUG_TRACE
DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel));
#endif
/* pass #1: low level plugs */
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *pluginToLoad = &pluginList[i];
if(pluginToLoad->awaitingLoad) {
if(pluginToLoad->level == UPLUG_LEVEL_LOW) {
if(currentLevel > UPLUG_LEVEL_LOW) {
pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH;
} else {
UPlugLevel newLevel;
uplug_loadPlug(pluginToLoad, &subStatus);
newLevel = uplug_getCurrentLevel();
if(newLevel > currentLevel) {
pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING;
currentLevel = newLevel;
}
}
pluginToLoad->awaitingLoad = FALSE;
}
}
}
for(i=0;i<pluginCount;i++) {
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *pluginToLoad = &pluginList[i];
if(pluginToLoad->awaitingLoad) {
if(pluginToLoad->level == UPLUG_LEVEL_INVALID) {
pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL;
} else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) {
pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR;
} else {
uplug_loadPlug(pluginToLoad, &subStatus);
}
pluginToLoad->awaitingLoad = FALSE;
}
}
#if UPLUG_TRACE
DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel()));
#endif
}
/* Name of the plugin config file */
static char plugin_file[2048] = "";
#endif
U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile() {
#if U_ENABLE_DYLOAD
return plugin_file;
#else
return NULL;
#endif
}
U_CAPI void U_EXPORT2
uplug_init(UErrorCode *status) {
#if !U_ENABLE_DYLOAD
(void)status; /* unused */
#else
const char *plugin_dir;
if(U_FAILURE(*status)) return;
plugin_dir = getenv("ICU_PLUGINS");
#if defined(DEFAULT_ICU_PLUGINS)
if(plugin_dir == NULL || !*plugin_dir) {
plugin_dir = DEFAULT_ICU_PLUGINS;
}
#endif
#if UPLUG_TRACE
DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir));
#endif
if(plugin_dir != NULL && *plugin_dir) {
FILE *f;
#ifdef OS390BATCH
/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */
/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */
/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */
/* System Services. Alternative techniques might be allocating a member in */
/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */
/* DDNAME can be connected to a file in the HFS if need be. */
uprv_strncpy(plugin_file,"//DD:ICUPLUG", 2047); /* JAM 20 Oct 2011 */
#else
uprv_strncpy(plugin_file, plugin_dir, 2047);
uprv_strncat(plugin_file, U_FILE_SEP_STRING,2047);
uprv_strncat(plugin_file, "icuplugins",2047);
uprv_strncat(plugin_file, U_ICU_VERSION_SHORT ,2047);
uprv_strncat(plugin_file, ".txt" ,2047);
#endif
#if UPLUG_TRACE
DBG((stderr, "pluginfile= %s\n", plugin_file));
#endif
#ifdef __MVS__
if (iscics()) /* 12 Nov 2011 JAM */
{
f = NULL;
}
else
#endif
{
f = fopen(plugin_file, "r");
}
if(f != NULL) {
char linebuf[1024];
char *p, *libName=NULL, *symName=NULL, *config=NULL;
int32_t line = 0;
while(fgets(linebuf,1023,f)) {
line++;
if(!*linebuf || *linebuf=='#') {
continue;
} else {
p = linebuf;
while(*p&&isspace((int)*p))
p++;
if(!*p || *p=='#') continue;
libName = p;
while(*p&&!isspace((int)*p)) {
p++;
}
if(!*p || *p=='#') continue; /* no tab after libname */
*p=0; /* end of libname */
p++;
while(*p&&isspace((int)*p)) {
p++;
}
if(!*p||*p=='#') continue; /* no symname after libname +tab */
symName = p;
while(*p&&!isspace((int)*p)) {
p++;
}
if(*p) { /* has config */
*p=0;
++p;
while(*p&&isspace((int)*p)) {
p++;
}
if(*p) {
config = p;
}
}
/* chop whitespace at the end of the config */
if(config!=NULL&&*config!=0) {
p = config+strlen(config);
while(p>config&&isspace((int)*(--p))) {
*p=0;
}
}
/* OK, we're good. */
{
UErrorCode subStatus = U_ZERO_ERROR;
UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus);
if(U_FAILURE(subStatus) && U_SUCCESS(*status)) {
*status = subStatus;
}
#if UPLUG_TRACE
DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config));
DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus)));
#else
(void)plug; /* unused */
#endif
}
}
}
fclose(f);
} else {
#if UPLUG_TRACE
DBG((stderr, "Can't open plugin file %s\n", plugin_file));
#endif
}
}
uplug_loadWaitingPlugs(status);
#endif /* U_ENABLE_DYLOAD */
ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup);
}

Просмотреть файл

@ -0,0 +1,87 @@
/*
******************************************************************************
*
* Copyright (C) 2009-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : icuplugimp.h
*
* Internal functions for the ICU plugin system
*
* Date Name Description
* 10/29/2009 sl New.
******************************************************************************
*/
#ifndef ICUPLUGIMP_H
#define ICUPLUGIMP_H
#include "unicode/icuplug.h"
/*========================*/
/** @{ Library Manipulation
*/
/**
* Open a library, adding a reference count if needed.
* @param libName library name to load
* @param status error code
* @return the library pointer, or NULL
* @internal internal use only
*/
U_INTERNAL void * U_EXPORT2
uplug_openLibrary(const char *libName, UErrorCode *status);
/**
* Close a library, if its reference count is 0
* @param lib the library to close
* @param status error code
* @internal internal use only
*/
U_INTERNAL void U_EXPORT2
uplug_closeLibrary(void *lib, UErrorCode *status);
/**
* Get a library's name, or NULL if not found.
* @param lib the library's name
* @param status error code
* @return the library name, or NULL if not found.
* @internal internal use only
*/
U_INTERNAL char * U_EXPORT2
uplug_findLibrary(void *lib, UErrorCode *status);
/** @} */
/*========================*/
/** {@ ICU Plugin internal interfaces
*/
/**
* Initialize the plugins
* @param status error result
* @internal - Internal use only.
*/
U_INTERNAL void U_EXPORT2
uplug_init(UErrorCode *status);
/**
* Get raw plug N
* @internal - Internal use only
*/
U_INTERNAL UPlugData* U_EXPORT2
uplug_getPlugInternal(int32_t n);
/**
* Get the name of the plugin file.
* @internal - Internal use only.
*/
U_INTERNAL const char* U_EXPORT2
uplug_getPluginFile(void);
/** @} */
#endif

Просмотреть файл

@ -0,0 +1,325 @@
/*
*******************************************************************************
*
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformatter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2012aug27
* created by: Umesh P. Nair
*/
#include "unicode/listformatter.h"
#include "mutex.h"
#include "hash.h"
#include "cstring.h"
#include "ulocimp.h"
#include "charstr.h"
#include "ucln_cmn.h"
U_NAMESPACE_BEGIN
static Hashtable* listPatternHash = NULL;
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
static UChar FIRST_PARAMETER[] = { 0x7b, 0x30, 0x7d }; // "{0}"
static UChar SECOND_PARAMETER[] = { 0x7b, 0x31, 0x7d }; // "{0}"
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() {
delete listPatternHash;
listPatternHash = NULL;
return TRUE;
}
static void U_CALLCONV
uprv_deleteListFormatData(void *obj) {
delete static_cast<ListFormatData *>(obj);
}
U_CDECL_END
void ListFormatter::initializeHash(UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
listPatternHash = new Hashtable();
if (listPatternHash == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
listPatternHash->setValueDeleter(uprv_deleteListFormatData);
ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup);
addDataToHash("af", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
addDataToHash("am", "{0} \\u12a5\\u1293 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u12a5\\u1293 {1}", errorCode);
addDataToHash("ar", "{0} \\u0648 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}", "{0}\\u060c \\u0648 {1}", errorCode);
addDataToHash("bg", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("bn", "{0} \\u098f\\u09ac\\u0982 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u098f\\u09ac\\u0982 {1}", errorCode);
addDataToHash("bs", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("ca", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("cs", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
addDataToHash("da", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("de", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("ee", "{0} kple {1}", "{0}, {1}", "{0}, {1}", "{0}, kple {1}", errorCode);
addDataToHash("el", "{0} \\u03ba\\u03b1\\u03b9 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u03ba\\u03b1\\u03b9 {1}", errorCode);
addDataToHash("en", "{0} and {1}", "{0}, {1}", "{0}, {1}", "{0}, and {1}", errorCode);
addDataToHash("es", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
addDataToHash("et", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("eu", "{0} eta {1}", "{0}, {1}", "{0}, {1}", "{0} eta {1}", errorCode);
addDataToHash("fa", "{0} \\u0648 {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c \\u0648 {1}", errorCode);
addDataToHash("fi", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("fil", "{0} at {1}", "{0}, {1}", "{0}, {1}", "{0} at {1}", errorCode);
addDataToHash("fo", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("fr", "{0} et {1}", "{0}, {1}", "{0}, {1}", "{0} et {1}", errorCode);
addDataToHash("fur", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("gd", "{0} agus {1}", "{0}, {1}", "{0}, {1}", "{0}, agus {1}", errorCode);
addDataToHash("gl", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("gsw", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("gu", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", errorCode);
addDataToHash("he", "{0} \\u05d5-{1}", "{0}, {1}", "{0}, {1}", "{0} \\u05d5-{1}", errorCode);
addDataToHash("hi", "{0} \\u0914\\u0930 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u0914\\u0930 {1}", errorCode);
addDataToHash("hr", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("hu", "{0} \\u00e9s {1}", "{0}, {1}", "{0}, {1}", "{0} \\u00e9s {1}", errorCode);
addDataToHash("id", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
addDataToHash("is", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("it", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0}, e {1}", errorCode);
addDataToHash("ja", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", errorCode);
addDataToHash("ka", "{0} \\u10d3\\u10d0 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u10d3\\u10d0 {1}", errorCode);
addDataToHash("kea", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
addDataToHash("kl", "{0} aamma {1}", "{0} aamma {1}", "{0}, {1}", "{0}, {1}", errorCode);
addDataToHash("kn", "{0} \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", "{0}, {1}", "{0}, {1}",
"{0}, \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", errorCode);
addDataToHash("ko", "{0} \\ubc0f {1}", "{0}, {1}", "{0}, {1}", "{0} \\ubc0f {1}", errorCode);
addDataToHash("ksh", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
addDataToHash("lt", "{0} ir {1}", "{0}, {1}", "{0}, {1}", "{0} ir {1}", errorCode);
addDataToHash("lv", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
addDataToHash("ml", "{0} \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 {1}", "{0}, {1}", "{0}, {1}",
"{0}, {1} \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", errorCode);
addDataToHash("mr", "{0} \\u0906\\u0923\\u093f {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0906\\u0923\\u093f {1}", errorCode);
addDataToHash("ms", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
addDataToHash("nb", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("nl", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
addDataToHash("nn", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("pl", "{0} i {1}", "{0}; {1}", "{0}; {1}", "{0} i {1}", errorCode);
addDataToHash("pt", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("ro", "{0} \\u015fi {1}", "{0}, {1}", "{0}, {1}", "{0} \\u015fi {1}", errorCode);
addDataToHash("", "{0}, {1}", "{0}, {1}", "{0}, {1}", "{0}, {1}", errorCode); // root
addDataToHash("ru", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("se", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("sk", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
addDataToHash("sl", "{0} in {1}", "{0}, {1}", "{0}, {1}", "{0} in {1}", errorCode);
addDataToHash("sr", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("sr_Cyrl", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("sr_Latn", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("sv", "{0} och {1}", "{0}, {1}", "{0}, {1}", "{0} och {1}", errorCode);
addDataToHash("sw", "{0} na {1}", "{0}, {1}", "{0}, {1}", "{0}, na {1}", errorCode);
addDataToHash("ta", "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", "{0}, {1}", "{0}, {1}",
"{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", errorCode);
addDataToHash("te", "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", "{0}, {1}", "{0}, {1}",
"{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", errorCode);
addDataToHash("th", "{0}\\u0e41\\u0e25\\u0e30{1}", "{0} {1}", "{0} {1}", "{0} \\u0e41\\u0e25\\u0e30{1}", errorCode);
addDataToHash("tr", "{0} ve {1}", "{0}, {1}", "{0}, {1}", "{0} ve {1}", errorCode);
addDataToHash("uk", "{0} \\u0442\\u0430 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0442\\u0430 {1}", errorCode);
addDataToHash("ur", "{0} \\u0627\\u0648\\u0631 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}",
"{0}\\u060c \\u0627\\u0648\\u0631 {1}", errorCode);
addDataToHash("vi", "{0} v\\u00e0 {1}", "{0}, {1}", "{0}, {1}", "{0} v\\u00e0 {1}", errorCode);
addDataToHash("wae", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("zh", "{0}\\u548c{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u548c{1}", errorCode);
addDataToHash("zu", "I-{0} ne-{1}", "{0}, {1}", "{0}, {1}", "{0}, no-{1}", errorCode);
}
void ListFormatter::addDataToHash(
const char* locale,
const char* two,
const char* start,
const char* middle,
const char* end,
UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
UnicodeString key(locale, -1, US_INV);
ListFormatData* value = new ListFormatData(
UnicodeString(two, -1, US_INV).unescape(),
UnicodeString(start, -1, US_INV).unescape(),
UnicodeString(middle, -1, US_INV).unescape(),
UnicodeString(end, -1, US_INV).unescape());
if (value == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
listPatternHash->put(key, value, errorCode);
}
const ListFormatData* ListFormatter::getListFormatData(
const Locale& locale, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return NULL;
}
{
Mutex m(&listFormatterMutex);
if (listPatternHash == NULL) {
initializeHash(errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
}
}
UnicodeString key(locale.getName(), -1, US_INV);
return static_cast<const ListFormatData*>(listPatternHash->get(key));
}
ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
Locale locale; // The default locale.
return createInstance(locale, errorCode);
}
ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
Locale tempLocale = locale;
for (;;) {
const ListFormatData* listFormatData = getListFormatData(tempLocale, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
if (listFormatData != NULL) {
ListFormatter* p = new ListFormatter(*listFormatData);
if (p == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return p;
}
errorCode = U_ZERO_ERROR;
Locale correctLocale;
getFallbackLocale(tempLocale, correctLocale, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
if (correctLocale.isBogus()) {
return createInstance(Locale::getRoot(), errorCode);
}
tempLocale = correctLocale;
}
}
ListFormatter::ListFormatter(const ListFormatData& listFormatterData) : data(listFormatterData) {
}
ListFormatter::~ListFormatter() {}
void ListFormatter::getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode) {
if (uprv_strcmp(in.getName(), "zh_TW") == 0) {
out = Locale::getTraditionalChinese();
} else {
const char* localeString = in.getName();
const char* extStart = locale_getKeywordsStart(localeString);
if (extStart == NULL) {
extStart = uprv_strchr(localeString, 0);
}
const char* last = extStart;
// TODO: Check whether uloc_getParent() will work here.
while (last > localeString && *(last - 1) != '_') {
--last;
}
// Truncate empty segment.
while (last > localeString) {
if (*(last-1) != '_') {
break;
}
--last;
}
size_t localePortionLen = last - localeString;
CharString fullLocale;
fullLocale.append(localeString, localePortionLen, errorCode).append(extStart, errorCode);
if (U_FAILURE(errorCode)) {
return;
}
out = Locale(fullLocale.data());
}
}
UnicodeString& ListFormatter::format(const UnicodeString items[], int32_t nItems,
UnicodeString& appendTo, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return appendTo;
}
if (nItems > 0) {
UnicodeString newString = items[0];
if (nItems == 2) {
addNewString(data.twoPattern, newString, items[1], errorCode);
} else if (nItems > 2) {
addNewString(data.startPattern, newString, items[1], errorCode);
int i;
for (i = 2; i < nItems - 1; ++i) {
addNewString(data.middlePattern, newString, items[i], errorCode);
}
addNewString(data.endPattern, newString, items[nItems - 1], errorCode);
}
if (U_SUCCESS(errorCode)) {
appendTo += newString;
}
}
return appendTo;
}
/**
* Joins originalString and nextString using the pattern pat and puts the result in
* originalString.
*/
void ListFormatter::addNewString(const UnicodeString& pat, UnicodeString& originalString,
const UnicodeString& nextString, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
int32_t p0Offset = pat.indexOf(FIRST_PARAMETER, 3, 0);
if (p0Offset < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
int32_t p1Offset = pat.indexOf(SECOND_PARAMETER, 3, 0);
if (p1Offset < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
int32_t i, j;
const UnicodeString* firstString;
const UnicodeString* secondString;
if (p0Offset < p1Offset) {
i = p0Offset;
j = p1Offset;
firstString = &originalString;
secondString = &nextString;
} else {
i = p1Offset;
j = p0Offset;
firstString = &nextString;
secondString = &originalString;
}
UnicodeString result = UnicodeString(pat, 0, i) + *firstString;
result += UnicodeString(pat, i+3, j-i-3);
result += *secondString;
result += UnicodeString(pat, j+3);
originalString = result;
}
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(ListFormatter)
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,25 @@
/*
***************************************************************************
* Copyright (C) 2006 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#ifndef LOCALSVC_H
#define LOCALSVC_H
#include "unicode/utypes.h"
#if U_LOCAL_SERVICE_HOOK
/**
* Prototype for user-supplied service hook. This function is expected to return
* a type of factory object specific to the requested service.
*
* @param what service-specific string identifying the specific user hook
* @param status error status
* @return a service-specific hook, or NULL on failure.
*/
U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status);
#endif
#endif

Просмотреть файл

@ -0,0 +1,187 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: locavailable.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for available locales, separated out from other .cpp files
* that then do not depend on resource bundle code and res_index bundles.
*/
#include "unicode/utypes.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "cmemory.h"
#include "ucln_cmn.h"
#include "umutex.h"
#include "uresimp.h"
// C++ API ----------------------------------------------------------------- ***
static icu::Locale* availableLocaleList = NULL;
static int32_t availableLocaleListCount;
U_CDECL_BEGIN
static UBool U_CALLCONV locale_available_cleanup(void)
{
U_NAMESPACE_USE
if (availableLocaleList) {
delete []availableLocaleList;
availableLocaleList = NULL;
}
availableLocaleListCount = 0;
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
const Locale* U_EXPORT2
Locale::getAvailableLocales(int32_t& count)
{
// for now, there is a hardcoded list, so just walk through that list and set it up.
UBool needInit;
UMTX_CHECK(NULL, availableLocaleList == NULL, needInit);
if (needInit) {
int32_t locCount = uloc_countAvailable();
Locale *newLocaleList = 0;
if(locCount) {
newLocaleList = new Locale[locCount];
}
if (newLocaleList == NULL) {
count = 0;
return NULL;
}
count = locCount;
while(--locCount >= 0) {
newLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount));
}
umtx_lock(NULL);
if(availableLocaleList == 0) {
availableLocaleListCount = count;
availableLocaleList = newLocaleList;
newLocaleList = NULL;
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup);
}
umtx_unlock(NULL);
delete []newLocaleList;
}
count = availableLocaleListCount;
return availableLocaleList;
}
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
/* ### Constants **************************************************/
/* These strings describe the resources we attempt to load from
the locale ResourceBundle data file.*/
static const char _kIndexLocaleName[] = "res_index";
static const char _kIndexTag[] = "InstalledLocales";
static char** _installedLocales = NULL;
static int32_t _installedLocalesCount = 0;
/* ### Get available **************************************************/
static UBool U_CALLCONV uloc_cleanup(void) {
char ** temp;
if (_installedLocales) {
temp = _installedLocales;
_installedLocales = NULL;
_installedLocalesCount = 0;
uprv_free(temp);
}
return TRUE;
}
static void _load_installedLocales()
{
UBool localesLoaded;
UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded);
if (localesLoaded == FALSE) {
UResourceBundle *indexLocale = NULL;
UResourceBundle installed;
UErrorCode status = U_ZERO_ERROR;
char ** temp;
int32_t i = 0;
int32_t localeCount;
ures_initStackObject(&installed);
indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status);
ures_getByKey(indexLocale, _kIndexTag, &installed, &status);
if(U_SUCCESS(status)) {
localeCount = ures_getSize(&installed);
temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
/* Check for null pointer */
if (temp != NULL) {
ures_resetIterator(&installed);
while(ures_hasNext(&installed)) {
ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
}
temp[i] = NULL;
umtx_lock(NULL);
if (_installedLocales == NULL)
{
_installedLocalesCount = localeCount;
_installedLocales = temp;
temp = NULL;
ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
}
umtx_unlock(NULL);
uprv_free(temp);
}
}
ures_close(&installed);
ures_close(indexLocale);
}
}
U_CAPI const char* U_EXPORT2
uloc_getAvailable(int32_t offset)
{
_load_installedLocales();
if (offset > _installedLocalesCount)
return NULL;
return _installedLocales[offset];
}
U_CAPI int32_t U_EXPORT2
uloc_countAvailable()
{
_load_installedLocales();
return _installedLocalesCount;
}

Просмотреть файл

@ -0,0 +1,46 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#include "locbased.h"
#include "cstring.h"
U_NAMESPACE_BEGIN
Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
const char* id = getLocaleID(type, status);
return Locale((id != 0) ? id : "");
}
const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (U_FAILURE(status)) {
return NULL;
}
switch(type) {
case ULOC_VALID_LOCALE:
return valid;
case ULOC_ACTUAL_LOCALE:
return actual;
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
}
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != 0) {
uprv_strcpy(valid, validID);
}
if (actualID != 0) {
uprv_strcpy(actual, actualID);
}
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,97 @@
/*
**********************************************************************
* Copyright (c) 2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: January 16 2004
* Since: ICU 2.8
**********************************************************************
*/
#ifndef LOCBASED_H
#define LOCBASED_H
#include "unicode/locid.h"
#include "unicode/uobject.h"
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale'.
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale);
U_NAMESPACE_BEGIN
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
* ICU 3.0 time frame in favor of an integrated approach with the
* services framework.
* @since ICU 2.8
*/
class U_COMMON_API LocaleBased : public UMemory {
public:
/**
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(char* validAlias, char* actualAlias);
/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
/**
* Return locale meta-data for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale
*/
Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Return the locale ID for the service object wrapped by this
* object. Either the valid or the actual locale may be
* retrieved.
* @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE
* @param status input-output error code
* @return the indicated locale ID
*/
const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
/**
* Set the locale meta-data for the service object wrapped by this
* object. If either parameter is zero, it is ignored.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual);
private:
char* valid;
char* actual;
};
inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
valid(validAlias), actual(actualAlias) {
}
inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid((char*)validAlias), actual((char*)actualAlias) {
}
U_NAMESPACE_END
#endif

Просмотреть файл

@ -0,0 +1,846 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: locdispnames.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for locale display names, separated out from other .cpp files
* that then do not depend on resource bundle code and display name data.
*/
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "ulocimp.h"
#include "uresimp.h"
#include "ureslocs.h"
#include "ustr_imp.h"
// C++ API ----------------------------------------------------------------- ***
U_NAMESPACE_BEGIN
UnicodeString&
Locale::getDisplayLanguage(UnicodeString& dispLang) const
{
return this->getDisplayLanguage(getDefault(), dispLang);
}
/*We cannot make any assumptions on the size of the output display strings
* Yet, since we are calling through to a C API, we need to set limits on
* buffer size. For all the following getDisplay functions we first attempt
* to fill up a stack allocated buffer. If it is to small we heap allocated
* the exact buffer we need copy it to the UnicodeString and delete it*/
UnicodeString&
Locale::getDisplayLanguage(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayScript(UnicodeString& dispScript) const
{
return this->getDisplayScript(getDefault(), dispScript);
}
UnicodeString&
Locale::getDisplayScript(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayScript(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayCountry(UnicodeString& dispCntry) const
{
return this->getDisplayCountry(getDefault(), dispCntry);
}
UnicodeString&
Locale::getDisplayCountry(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayVariant(UnicodeString& dispVar) const
{
return this->getDisplayVariant(getDefault(), dispVar);
}
UnicodeString&
Locale::getDisplayVariant(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
UnicodeString&
Locale::getDisplayName( UnicodeString& name ) const
{
return this->getDisplayName(getDefault(), name);
}
UnicodeString&
Locale::getDisplayName(const Locale &displayLocale,
UnicodeString &result) const {
UChar *buffer;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t length;
buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
if(buffer==0) {
result.truncate(0);
return result;
}
length=uloc_getDisplayName(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
buffer=result.getBuffer(length);
if(buffer==0) {
result.truncate(0);
return result;
}
errorCode=U_ZERO_ERROR;
length=uloc_getDisplayName(fullName, displayLocale.fullName,
buffer, result.getCapacity(),
&errorCode);
result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
}
return result;
}
#if ! UCONFIG_NO_BREAK_ITERATION
// -------------------------------------
// Gets the objectLocale display name in the default locale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(name);
}
// -------------------------------------
// Gets the objectLocale display name in the displayLocale language.
UnicodeString& U_EXPORT2
BreakIterator::getDisplayName(const Locale& objectLocale,
const Locale& displayLocale,
UnicodeString& name)
{
return objectLocale.getDisplayName(displayLocale, name);
}
#endif
U_NAMESPACE_END
// C API ------------------------------------------------------------------- ***
U_NAMESPACE_USE
/* ### Constants **************************************************/
/* These strings describe the resources we attempt to load from
the locale ResourceBundle data file.*/
static const char _kLanguages[] = "Languages";
static const char _kScripts[] = "Scripts";
static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
static const char _kCountries[] = "Countries";
static const char _kVariants[] = "Variants";
static const char _kKeys[] = "Keys";
static const char _kTypes[] = "Types";
//static const char _kRootName[] = "root";
static const char _kCurrency[] = "currency";
static const char _kCurrencies[] = "Currencies";
static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
static const char _kPattern[] = "pattern";
static const char _kSeparator[] = "separator";
/* ### Display name **************************************************/
static int32_t
_getStringOrCopyKey(const char *path, const char *locale,
const char *tableKey,
const char* subTableKey,
const char *itemKey,
const char *substitute,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
const UChar *s = NULL;
int32_t length = 0;
if(itemKey==NULL) {
/* top-level item: normal resource bundle access */
UResourceBundle *rb;
rb=ures_open(path, locale, pErrorCode);
if(U_SUCCESS(*pErrorCode)) {
s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
/* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
ures_close(rb);
}
} else {
/* Language code should not be a number. If it is, set the error code. */
if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
*pErrorCode = U_MISSING_RESOURCE_ERROR;
} else {
/* second-level item, use special fallback */
s=uloc_getTableStringWithFallback(path, locale,
tableKey,
subTableKey,
itemKey,
&length,
pErrorCode);
}
}
if(U_SUCCESS(*pErrorCode)) {
int32_t copyLength=uprv_min(length, destCapacity);
if(copyLength>0 && s != NULL) {
u_memcpy(dest, s, copyLength);
}
} else {
/* no string from a resource bundle: convert the substitute */
length=(int32_t)uprv_strlen(substitute);
u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
*pErrorCode=U_USING_DEFAULT_WARNING;
}
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
static int32_t
_getDisplayNameForComponent(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UDisplayNameGetter *getter,
const char *tag,
UErrorCode *pErrorCode) {
char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
int32_t length;
UErrorCode localStatus;
const char* root = NULL;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
localStatus = U_ZERO_ERROR;
length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(length==0) {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
return _getStringOrCopyKey(root, displayLocale,
tag, NULL, localeBuffer,
localeBuffer,
dest, destCapacity,
pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getLanguage, _kLanguages, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayScript(const char* locale,
const char* displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
UErrorCode err = U_ZERO_ERROR;
int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScriptsStandAlone, &err);
if ( err == U_USING_DEFAULT_WARNING ) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScripts, pErrorCode);
} else {
*pErrorCode = err;
return res;
}
}
U_INTERNAL int32_t U_EXPORT2
uloc_getDisplayScriptInContext(const char* locale,
const char* displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getScript, _kScripts, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayCountry(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getCountry, _kCountries, pErrorCode);
}
/*
* TODO separate variant1_variant2_variant3...
* by getting each tag's display string and concatenating them with ", "
* in between - similar to uloc_getDisplayName()
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayVariant(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode) {
return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
uloc_getVariant, _kVariants, pErrorCode);
}
/* Instead of having a separate pass for 'special' patterns, reintegrate the two
* so we don't get bitten by preflight bugs again. We can be reasonably efficient
* without two separate code paths, this code isn't that performance-critical.
*
* This code is general enough to deal with patterns that have a prefix or swap the
* language and remainder components, since we gave developers enough rope to do such
* things if they futz with the pattern data. But since we don't give them a way to
* specify a pattern for arbitrary combinations of components, there's not much use in
* that. I don't think our data includes such patterns, the only variable I know if is
* whether there is a space before the open paren, or not. Oh, and zh uses different
* chars than the standard open/close paren (which ja and ko use, btw).
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char *locale,
const char *displayLocale,
UChar *dest, int32_t destCapacity,
UErrorCode *pErrorCode)
{
static const UChar defaultSeparator[3] = { 0x002c, 0x0020, 0x0000 }; /* comma + space */
static const int32_t defaultSepLen = 2;
static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */
static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */
static const int32_t subLen = 3;
static const UChar defaultPattern[10] = {
0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000
}; /* {0} ({1}) */
static const int32_t defaultPatLen = 9;
static const int32_t defaultSub0Pos = 0;
static const int32_t defaultSub1Pos = 5;
int32_t length; /* of formatted result */
const UChar *separator;
int32_t sepLen = 0;
const UChar *pattern;
int32_t patLen = 0;
int32_t sub0Pos, sub1Pos;
UBool haveLang = TRUE; /* assume true, set false if we find we don't have
a lang component in the locale */
UBool haveRest = TRUE; /* assume true, set false if we find we don't have
any other component in the locale */
UBool retry = FALSE; /* set true if we need to retry, see below */
int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
{
UErrorCode status = U_ZERO_ERROR;
UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status);
UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern,
NULL, &status);
separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status);
pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status);
ures_close(dspbundle);
ures_close(locbundle);
}
/* If we couldn't find any data, then use the defaults */
if(sepLen == 0) {
separator = defaultSeparator;
sepLen = defaultSepLen;
}
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
pattern=defaultPattern;
patLen=defaultPatLen;
sub0Pos=defaultSub0Pos;
sub1Pos=defaultSub1Pos;
} else { /* non-default pattern */
UChar *p0=u_strstr(pattern, sub0);
UChar *p1=u_strstr(pattern, sub1);
if (p0==NULL || p1==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
sub0Pos=p0-pattern;
sub1Pos=p1-pattern;
if (sub1Pos < sub0Pos) { /* a very odd pattern */
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
langi=1;
}
}
/* We loop here because there is one case in which after the first pass we could need to
* reextract the data. If there's initial padding before the first element, we put in
* the padding and then write that element. If it turns out there's no second element,
* we didn't need the padding. If we do need the data (no preflight), and the first element
* would have fit but for the padding, we need to reextract. In this case (only) we
* adjust the parameters so padding is not added, and repeat.
*/
do {
UChar* p=dest;
int32_t patPos=0; /* position in the pattern, used for non-substitution portions */
int32_t langLen=0; /* length of language substitution */
int32_t langPos=0; /* position in output of language substitution */
int32_t restLen=0; /* length of 'everything else' substitution */
int32_t restPos=0; /* position in output of 'everything else' substitution */
UEnumeration* kenum = NULL; /* keyword enumeration */
/* prefix of pattern, extremely likely to be empty */
if(sub0Pos) {
if(destCapacity >= sub0Pos) {
while (patPos < sub0Pos) {
*p++ = pattern[patPos++];
}
} else {
patPos=sub0Pos;
}
length=sub0Pos;
} else {
length=0;
}
for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/
UBool subdone = FALSE; /* set true when ready to move to next substitution */
/* prep p and cap for calls to get display components, pin cap to 0 since
they complain if cap is negative */
int32_t cap=destCapacity-length;
if (cap <= 0) {
cap=0;
} else {
p=dest+length;
}
if (subi == langi) { /* {0}*/
if(haveLang) {
langPos=length;
langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode);
length+=langLen;
haveLang=langLen>0;
}
subdone=TRUE;
} else { /* {1} */
if(!haveRest) {
subdone=TRUE;
} else {
int32_t len; /* length of component (plus other stuff) we just fetched */
switch(resti++) {
case 0:
restPos=length;
len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode);
break;
case 1:
len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode);
break;
case 2:
len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode);
break;
case 3:
kenum = uloc_openKeywords(locale, pErrorCode);
/* fall through */
default: {
const char* kw=uenum_next(kenum, &len, pErrorCode);
if (kw == NULL) {
uenum_close(kenum);
len=0; /* mark that we didn't add a component */
subdone=TRUE;
} else {
/* incorporating this behavior into the loop made it even more complex,
so just special case it here */
len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode);
if(len) {
if(len < cap) {
p[len]=0x3d; /* '=', assume we'll need it */
}
len+=1;
/* adjust for call to get keyword */
cap-=len;
if(cap <= 0) {
cap=0;
} else {
p+=len;
}
}
/* reset for call below */
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
}
int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale,
p, cap, pErrorCode);
if(len) {
if(vlen==0) {
--len; /* remove unneeded '=' */
}
/* restore cap and p to what they were at start */
cap=destCapacity-length;
if(cap <= 0) {
cap=0;
} else {
p=dest+length;
}
}
len+=vlen; /* total we added for key + '=' + value */
}
} break;
} /* end switch */
if (len>0) {
/* we addeed a component, so add separator and write it if there's room. */
if(len+sepLen<=cap) {
p+=len;
for(int32_t i=0;i<sepLen;++i) {
*p++=separator[i];
}
}
length+=len+sepLen;
} else if(subdone) {
/* remove separator if we added it */
if (length!=restPos) {
length-=sepLen;
}
restLen=length-restPos;
haveRest=restLen>0;
}
}
}
if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
}
if(subdone) {
if(haveLang && haveRest) {
/* append internal portion of pattern, the first time,
or last portion of pattern the second time */
int32_t padLen;
patPos+=subLen;
padLen=(subi==0 ? sub1Pos : patLen)-patPos;
if(length+padLen < destCapacity) {
p=dest+length;
for(int32_t i=0;i<padLen;++i) {
*p++=pattern[patPos++];
}
} else {
patPos+=padLen;
}
length+=padLen;
} else if(subi==0) {
/* don't have first component, reset for second component */
sub0Pos=0;
length=0;
} else if(length>0) {
/* true length is the length of just the component we got. */
length=haveLang?langLen:restLen;
if(dest && sub0Pos!=0) {
if (sub0Pos+length<=destCapacity) {
/* first component not at start of result,
but we have full component in buffer. */
u_memmove(dest, dest+(haveLang?langPos:restPos), length);
} else {
/* would have fit, but didn't because of pattern prefix. */
sub0Pos=0; /* stops initial padding (and a second retry,
so we won't end up here again) */
retry=TRUE;
}
}
}
++subi; /* move on to next substitution */
}
}
} while(retry);
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeyword(const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status){
/* argument checking */
if(status==NULL || U_FAILURE(*status)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* pass itemKey=NULL to look for a top-level item */
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
_kKeys, NULL,
keyword,
keyword,
dest, destCapacity,
status);
}
#define UCURRENCY_DISPLAY_NAME_INDEX 1
U_CAPI int32_t U_EXPORT2
uloc_getDisplayKeywordValue( const char* locale,
const char* keyword,
const char* displayLocale,
UChar* dest,
int32_t destCapacity,
UErrorCode* status){
char keywordValue[ULOC_FULLNAME_CAPACITY*4];
int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
int32_t keywordValueLen =0;
/* argument checking */
if(status==NULL || U_FAILURE(*status)) {
return 0;
}
if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/* get the keyword value */
keywordValue[0]=0;
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
/*
* if the keyword is equal to currency .. then to get the display name
* we need to do the fallback ourselves
*/
if(uprv_stricmp(keyword, _kCurrency)==0){
int32_t dispNameLen = 0;
const UChar *dispName = NULL;
UResourceBundle *bundle = ures_open(U_ICUDATA_CURR, displayLocale, status);
UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
/*close the bundles */
ures_close(currency);
ures_close(currencies);
ures_close(bundle);
if(U_FAILURE(*status)){
if(*status == U_MISSING_RESOURCE_ERROR){
/* we just want to write the value over if nothing is available */
*status = U_USING_DEFAULT_WARNING;
}else{
return 0;
}
}
/* now copy the dispName over if not NULL */
if(dispName != NULL){
if(dispNameLen <= destCapacity){
uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
return u_terminateUChars(dest, destCapacity, dispNameLen, status);
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
return dispNameLen;
}
}else{
/* we have not found the display name for the value .. just copy over */
if(keywordValueLen <= destCapacity){
u_charsToUChars(keywordValue, dest, keywordValueLen);
return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
return keywordValueLen;
}
}
}else{
return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale,
_kTypes, keyword,
keywordValue,
keywordValue,
dest, destCapacity,
status);
}
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,37 @@
/*
******************************************************************************
*
* Copyright (C) 1996-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* File locmap.h : Locale Mapping Classes
*
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
* 3/11/97 aliu Added setId().
* 4/20/99 Madhu Added T_convertToPosix()
* 09/18/00 george Removed the memory leaks.
* 08/23/01 george Convert to C
*============================================================================
*/
#ifndef LOCMAP_H
#define LOCMAP_H
#include "unicode/utypes.h"
#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID)
U_CAPI const char *uprv_convertToPosix(uint32_t hostid, UErrorCode* status);
/* Don't call this function directly. Use uloc_getLCID instead. */
U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status);
#endif /* LOCMAP_H */

Просмотреть файл

@ -0,0 +1,223 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: loclikely.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2010feb25
* created by: Markus W. Scherer
*
* Code for miscellaneous locale-related resource bundle data access,
* separated out from other .cpp files
* that then do not depend on resource bundle code and this data.
*/
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "cstring.h"
#include "ulocimp.h"
#include "uresimp.h"
/*
* Lookup a resource bundle table item with fallback on the table level.
* Regular resource bundle lookups perform fallback to parent locale bundles
* and eventually the root bundle, but only for top-level items.
* This function takes the name of a top-level table and of an item in that table
* and performs a lookup of both, falling back until a bundle contains a table
* with this item.
*
* Note: Only the opening of entire bundles falls back through the default locale
* before root. Once a bundle is open, item lookups do not go through the
* default locale because that would result in a mix of languages that is
* unpredictable to the programmer and most likely useless.
*/
U_CAPI const UChar * U_EXPORT2
uloc_getTableStringWithFallback(const char *path, const char *locale,
const char *tableKey, const char *subTableKey,
const char *itemKey,
int32_t *pLength,
UErrorCode *pErrorCode)
{
/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
UResourceBundle *rb=NULL, table, subTable;
const UChar *item=NULL;
UErrorCode errorCode;
char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
/*
* open the bundle for the current locale
* this falls back through the locale's chain to root
*/
errorCode=U_ZERO_ERROR;
rb=ures_open(path, locale, &errorCode);
if(U_FAILURE(errorCode)) {
/* total failure, not even root could be opened */
*pErrorCode=errorCode;
return NULL;
} else if(errorCode==U_USING_DEFAULT_WARNING ||
(errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
) {
/* set the "strongest" error code (success->fallback->default->failure) */
*pErrorCode=errorCode;
}
for(;;){
ures_initStackObject(&table);
ures_initStackObject(&subTable);
ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
if (subTableKey != NULL) {
/*
ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
}
break;*/
ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
}
if(U_SUCCESS(errorCode)){
item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
if(U_FAILURE(errorCode)){
const char* replacement = NULL;
*pErrorCode = errorCode; /*save the errorCode*/
errorCode = U_ZERO_ERROR;
/* may be a deprecated code */
if(uprv_strcmp(tableKey, "Countries")==0){
replacement = uloc_getCurrentCountryID(itemKey);
}else if(uprv_strcmp(tableKey, "Languages")==0){
replacement = uloc_getCurrentLanguageID(itemKey);
}
/*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
if(replacement!=NULL && itemKey != replacement){
item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
if(U_SUCCESS(errorCode)){
*pErrorCode = errorCode;
break;
}
}
}else{
break;
}
}
if(U_FAILURE(errorCode)){
/* still can't figure out ?.. try the fallback mechanism */
int32_t len = 0;
const UChar* fallbackLocale = NULL;
*pErrorCode = errorCode;
errorCode = U_ZERO_ERROR;
fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
break;
}
u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
/* guard against recursive fallback */
if(uprv_strcmp(explicitFallbackName, locale)==0){
*pErrorCode = U_INTERNAL_PROGRAM_ERROR;
break;
}
ures_close(rb);
rb = ures_open(path, explicitFallbackName, &errorCode);
if(U_FAILURE(errorCode)){
*pErrorCode = errorCode;
break;
}
/* succeeded in opening the fallback bundle .. continue and try to fetch the item */
}else{
break;
}
}
/* done with the locale string - ready to close table and rb */
ures_close(&subTable);
ures_close(&table);
ures_close(rb);
return item;
}
static ULayoutType
_uloc_getOrientationHelper(const char* localeId,
const char* key,
UErrorCode *status)
{
ULayoutType result = ULOC_LAYOUT_UNKNOWN;
if (!U_FAILURE(*status)) {
int32_t length = 0;
char localeBuffer[ULOC_FULLNAME_CAPACITY];
uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
if (!U_FAILURE(*status)) {
const UChar* const value =
uloc_getTableStringWithFallback(
NULL,
localeBuffer,
"layout",
NULL,
key,
&length,
status);
if (!U_FAILURE(*status) && length != 0) {
switch(value[0])
{
case 0x0062: /* 'b' */
result = ULOC_LAYOUT_BTT;
break;
case 0x006C: /* 'l' */
result = ULOC_LAYOUT_LTR;
break;
case 0x0072: /* 'r' */
result = ULOC_LAYOUT_RTL;
break;
case 0x0074: /* 't' */
result = ULOC_LAYOUT_TTB;
break;
default:
*status = U_INTERNAL_PROGRAM_ERROR;
break;
}
}
}
}
return result;
}
U_CAPI ULayoutType U_EXPORT2
uloc_getCharacterOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "characters", status);
}
/**
* Get the layout line orientation for the specified locale.
*
* @param localeID locale name
* @param status Error status
* @return an enum indicating the layout orientation for lines.
*/
U_CAPI ULayoutType U_EXPORT2
uloc_getLineOrientation(const char* localeId,
UErrorCode *status)
{
return _uloc_getOrientationHelper(localeId, "lines", status);
}

Просмотреть файл

@ -0,0 +1,265 @@
/*
*******************************************************************************
* Copyright (C) 2002-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
#include "unicode/resbund.h"
#include "cmemory.h"
#include "ustrfmt.h"
#include "locutil.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
#include "umutex.h"
// see LocaleUtility::getAvailableLocaleNames
static icu::Hashtable * LocaleUtility_cache = NULL;
#define UNDERSCORE_CHAR ((UChar)0x005f)
#define AT_SIGN_CHAR ((UChar)64)
#define PERIOD_CHAR ((UChar)46)
/*
******************************************************************
*/
/**
* Release all static memory held by Locale Utility.
*/
U_CDECL_BEGIN
static UBool U_CALLCONV service_cleanup(void) {
if (LocaleUtility_cache) {
delete LocaleUtility_cache;
LocaleUtility_cache = NULL;
}
return TRUE;
}
U_CDECL_END
U_NAMESPACE_BEGIN
UnicodeString&
LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
{
if (id == NULL) {
result.setToBogus();
} else {
// Fix case only (no other changes) up to the first '@' or '.' or
// end of string, whichever comes first. In 3.0 I changed this to
// stop at first '@' or '.'. It used to run out to the end of
// string. My fix makes the tests pass but is probably
// structurally incorrect. See below. [alan 3.0]
// TODO: Doug, you might want to revise this...
result = *id;
int32_t i = 0;
int32_t end = result.indexOf(AT_SIGN_CHAR);
int32_t n = result.indexOf(PERIOD_CHAR);
if (n >= 0 && n < end) {
end = n;
}
if (end < 0) {
end = result.length();
}
n = result.indexOf(UNDERSCORE_CHAR);
if (n < 0) {
n = end;
}
for (; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0041 && c <= 0x005a) {
c += 0x20;
result.setCharAt(i, c);
}
}
for (n = end; i < n; ++i) {
UChar c = result.charAt(i);
if (c >= 0x0061 && c <= 0x007a) {
c -= 0x20;
result.setCharAt(i, c);
}
}
}
return result;
#if 0
// This code does a proper full level 2 canonicalization of id.
// It's nasty to go from UChar to char to char to UChar -- but
// that's what you have to do to use the uloc_canonicalize
// function on UnicodeStrings.
// I ended up doing the alternate fix (see above) not for
// performance reasons, although performance will certainly be
// better, but because doing a full level 2 canonicalization
// causes some tests to fail. [alan 3.0]
// TODO: Doug, you might want to revisit this...
result.setToBogus();
if (id != 0) {
int32_t buflen = id->length() + 8; // space for NUL
char* buf = (char*) uprv_malloc(buflen);
char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen);
if (buf != 0 && canon != 0) {
U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen);
UErrorCode ec = U_ZERO_ERROR;
uloc_canonicalize(buf, canon, buflen, &ec);
if (U_SUCCESS(ec)) {
result = UnicodeString(canon);
}
}
uprv_free(buf);
uprv_free(canon);
}
return result;
#endif
}
Locale&
LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
{
enum { BUFLEN = 128 }; // larger than ever needed
if (id.isBogus() || id.length() >= BUFLEN) {
result.setToBogus();
} else {
/*
* We need to convert from a UnicodeString to char * in order to
* create a Locale.
*
* Problem: Locale ID strings may contain '@' which is a variant
* character and cannot be handled by invariant-character conversion.
*
* Hack: Since ICU code can handle locale IDs with multiple encodings
* of '@' (at least for EBCDIC; it's not known to be a problem for
* ASCII-based systems),
* we use regular invariant-character conversion for everything else
* and manually convert U+0040 into a compiler-char-constant '@'.
* While this compilation-time constant may not match the runtime
* encoding of '@', it should be one of the encodings which ICU
* recognizes.
*
* There should be only at most one '@' in a locale ID.
*/
char buffer[BUFLEN];
int32_t prev, i;
prev = 0;
for(;;) {
i = id.indexOf((UChar)0x40, prev);
if(i < 0) {
// no @ between prev and the rest of the string
id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
break; // done
} else {
// normal invariant-character conversion for text between @s
id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
// manually "convert" U+0040 at id[i] into '@' at buffer[i]
buffer[i] = '@';
prev = i + 1;
}
}
result = Locale::createFromName(buffer);
}
return result;
}
UnicodeString&
LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
{
if (locale.isBogus()) {
result.setToBogus();
} else {
result.append(UnicodeString(locale.getName(), -1, US_INV));
}
return result;
}
const Hashtable*
LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
{
// LocaleUtility_cache is a hash-of-hashes. The top-level keys
// are path strings ('bundleID') passed to
// ures_openAvailableLocales. The top-level values are
// second-level hashes. The second-level keys are result strings
// from ures_openAvailableLocales. The second-level values are
// garbage ((void*)1 or other random pointer).
UErrorCode status = U_ZERO_ERROR;
Hashtable* cache;
umtx_lock(NULL);
cache = LocaleUtility_cache;
umtx_unlock(NULL);
if (cache == NULL) {
cache = new Hashtable(status);
if (cache == NULL || U_FAILURE(status)) {
return NULL; // catastrophic failure; e.g. out of memory
}
cache->setValueDeleter(uhash_deleteHashtable);
Hashtable* h; // set this to final LocaleUtility_cache value
umtx_lock(NULL);
h = LocaleUtility_cache;
if (h == NULL) {
LocaleUtility_cache = h = cache;
cache = NULL;
ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
}
umtx_unlock(NULL);
if(cache != NULL) {
delete cache;
}
cache = h;
}
U_ASSERT(cache != NULL);
Hashtable* htp;
umtx_lock(NULL);
htp = (Hashtable*) cache->get(bundleID);
umtx_unlock(NULL);
if (htp == NULL) {
htp = new Hashtable(status);
if (htp && U_SUCCESS(status)) {
CharString cbundleID;
cbundleID.appendInvariantChars(bundleID, status);
const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
UEnumeration *uenum = ures_openAvailableLocales(path, &status);
for (;;) {
const UChar* id = uenum_unext(uenum, NULL, &status);
if (id == NULL) {
break;
}
htp->put(UnicodeString(id), (void*)htp, status);
}
uenum_close(uenum);
if (U_FAILURE(status)) {
delete htp;
return NULL;
}
umtx_lock(NULL);
cache->put(bundleID, (void*)htp, status);
umtx_unlock(NULL);
}
}
return htp;
}
UBool
LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
{
return child.indexOf(root) == 0 &&
(child.length() == root.length() ||
child.charAt(root.length()) == UNDERSCORE_CHAR);
}
U_NAMESPACE_END
/* !UCONFIG_NO_SERVICE */
#endif

Просмотреть файл

@ -0,0 +1,37 @@
/**
*******************************************************************************
* Copyright (C) 2002-2005, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
#ifndef LOCUTIL_H
#define LOCUTIL_H
#include "unicode/utypes.h"
#include "hash.h"
#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION
U_NAMESPACE_BEGIN
// temporary utility functions, till I know where to find them
// in header so tests can also access them
class U_COMMON_API LocaleUtility {
public:
static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result);
static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
};
U_NAMESPACE_END
#endif
#endif

Просмотреть файл

@ -0,0 +1,63 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: messageimpl.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011apr04
* created by: Markus W. Scherer
*/
#ifndef __MESSAGEIMPL_H__
#define __MESSAGEIMPL_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messagepattern.h"
U_NAMESPACE_BEGIN
/**
* Helper functions for use of MessagePattern.
* In Java, these are package-private methods in MessagePattern itself.
* In C++, they are declared here and implemented in messagepattern.cpp.
*/
class U_COMMON_API MessageImpl {
public:
/**
* @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
*/
static UBool jdkAposMode(const MessagePattern &msgPattern) {
return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
}
/**
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
* according to JDK pattern behavior.
*/
static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
UnicodeString &sb);
/**
* Appends the sub-message to the result string.
* Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
*/
static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
int32_t msgStart,
UnicodeString &result);
private:
MessageImpl(); // no constructor: all static methods
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // __MESSAGEIMPL_H__

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,23 @@
//{{NO_DEPENDENCIES}}
// Copyright (c) 2003-2010 International Business Machines
// Corporation and others. All Rights Reserved.
//
// Used by common.rc and other .rc files.
//Do not edit with Microsoft Developer Studio because it will modify this
//header the wrong way. This is here to prevent Visual Studio .NET from
//unnessarily building the resource files when it's not needed.
//
/*
These are defined before unicode/uversion.h in order to prevent
STLPort's broken stddef.h from being used when rc.exe parses this file.
*/
#define _STLP_OUTERMOST_HEADER_ID 0
#define _STLP_WINCE 1
#include "unicode/uversion.h"
#define ICU_WEBSITE "http://icu-project.org"
#define ICU_COMPANY "The ICU Project"
#define ICU_PRODUCT_PREFIX "ICU"
#define ICU_PRODUCT "International Components for Unicode"

Просмотреть файл

@ -0,0 +1,140 @@
/*
*******************************************************************************
*
* Copyright (C) 2008-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: mutex.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*/
#include "unicode/utypes.h"
#include "mutex.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
void *SimpleSingleton::getInstance(InstantiatorFn *instantiator, const void *context,
void *&duplicate,
UErrorCode &errorCode) {
duplicate=NULL;
if(U_FAILURE(errorCode)) {
return NULL;
}
// TODO: With atomicops.h: void *instance = (void*)Acquire_Load(&fInstance);
// and remove UMTX_ACQUIRE_BARRIER below.
void *instance=ANNOTATE_UNPROTECTED_READ(fInstance);
UMTX_ACQUIRE_BARRIER;
ANNOTATE_HAPPENS_AFTER(&fInstance);
if(instance!=NULL) {
return instance;
}
// Attempt to create the instance.
// If a race occurs, then the losing thread will assign its new instance
// to the "duplicate" parameter, and the caller deletes it.
instance=instantiator(context, errorCode);
UMTX_RELEASE_BARRIER; // Release-barrier before fInstance=instance;
Mutex mutex;
if(fInstance==NULL && U_SUCCESS(errorCode)) {
U_ASSERT(instance!=NULL);
ANNOTATE_HAPPENS_BEFORE(&fInstance);
// TODO: With atomicops.h: Release_Store(&fInstance, (AtomicWord)instance);
// and remove UMTX_RELEASE_BARRIER above.
fInstance=instance;
} else {
duplicate=instance;
}
return fInstance;
}
/*
* Three states:
*
* Initial state: Instance creation not attempted yet.
* fInstance=NULL && U_SUCCESS(fErrorCode)
*
* Instance creation succeeded:
* fInstance!=NULL && U_SUCCESS(fErrorCode)
*
* Instance creation failed:
* fInstance=NULL && U_FAILURE(fErrorCode)
* We will not attempt again to create the instance.
*
* fInstance changes at most once.
* fErrorCode changes at most twice (intial->failed->succeeded).
*/
void *TriStateSingleton::getInstance(InstantiatorFn *instantiator, const void *context,
void *&duplicate,
UErrorCode &errorCode) {
duplicate=NULL;
if(U_FAILURE(errorCode)) {
return NULL;
}
// TODO: With atomicops.h: void *instance = (void*)Acquire_Load(&fInstance);
// and remove UMTX_ACQUIRE_BARRIER below.
void *instance=ANNOTATE_UNPROTECTED_READ(fInstance);
UMTX_ACQUIRE_BARRIER;
ANNOTATE_HAPPENS_AFTER(&fInstance);
if(instance!=NULL) {
// instance was created
return instance;
}
// The read access to fErrorCode is thread-unsafe, but harmless because
// at worst multiple threads race to each create a new instance,
// and all losing threads delete their duplicates.
UErrorCode localErrorCode=ANNOTATE_UNPROTECTED_READ(fErrorCode);
if(U_FAILURE(localErrorCode)) {
// instance creation failed
errorCode=localErrorCode;
return NULL;
}
// First attempt to create the instance.
// If a race occurs, then the losing thread will assign its new instance
// to the "duplicate" parameter, and the caller deletes it.
instance=instantiator(context, errorCode);
UMTX_RELEASE_BARRIER; // Release-barrier before fInstance=instance;
Mutex mutex;
if(fInstance==NULL && U_SUCCESS(errorCode)) {
// instance creation newly succeeded
U_ASSERT(instance!=NULL);
ANNOTATE_HAPPENS_BEFORE(&fInstance);
// TODO: With atomicops.h: Release_Store(&fInstance, (AtomicWord)instance);
// and remove UMTX_RELEASE_BARRIER above.
fInstance=instance;
// Set fErrorCode on the off-chance that a previous instance creation failed.
fErrorCode=errorCode;
// Completed state transition: initial->succeeded, or failed->succeeded.
} else {
// Record a duplicate if we lost the race, or
// if we got an instance but its creation failed anyway.
duplicate=instance;
if(fInstance==NULL && U_SUCCESS(fErrorCode) && U_FAILURE(errorCode)) {
// instance creation newly failed
fErrorCode=errorCode;
// Completed state transition: initial->failed.
}
}
return fInstance;
}
void TriStateSingleton::reset() {
fInstance=NULL;
fErrorCode=U_ZERO_ERROR;
}
#if UCONFIG_NO_SERVICE
/* If UCONFIG_NO_SERVICE, then there is no invocation of Mutex elsewhere in
common, so add one here to force an export */
static Mutex *aMutex = 0;
/* UCONFIG_NO_SERVICE */
#endif
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,198 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*/
//----------------------------------------------------------------------------
// File: mutex.h
//
// Lightweight C++ wrapper for umtx_ C mutex functions
//
// Author: Alan Liu 1/31/97
// History:
// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop.
// 04/07/1999 srl refocused as a thin wrapper
//
//----------------------------------------------------------------------------
#ifndef MUTEX_H
#define MUTEX_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------
// Code within that accesses shared static or global data should
// should instantiate a Mutex object while doing so. You should make your own
// private mutex where possible.
// For example:
//
// UMutex myMutex;
//
// void Function(int arg1, int arg2)
// {
// static Object* foo; // Shared read-write object
// Mutex mutex(&myMutex); // or no args for the global lock
// foo->Method();
// // When 'mutex' goes out of scope and gets destroyed here, the lock is released
// }
//
// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function
// returning a Mutex. This is a common mistake which silently slips through the
// compiler!!
//
class U_COMMON_API Mutex : public UMemory {
public:
inline Mutex(UMutex *mutex = NULL);
inline ~Mutex();
private:
UMutex *fMutex;
Mutex(const Mutex &other); // forbid copying of this class
Mutex &operator=(const Mutex &other); // forbid copying of this class
};
inline Mutex::Mutex(UMutex *mutex)
: fMutex(mutex)
{
umtx_lock(fMutex);
}
inline Mutex::~Mutex()
{
umtx_unlock(fMutex);
}
// common code for singletons ---------------------------------------------- ***
/**
* Function pointer for the instantiator parameter of
* SimpleSingleton::getInstance() and TriStateSingleton::getInstance().
* The function creates some object, optionally using the context parameter.
* The function need not check for U_FAILURE(errorCode).
*/
typedef void *InstantiatorFn(const void *context, UErrorCode &errorCode);
/**
* Singleton struct with shared instantiation/mutexing code.
* Simple: Does not remember if a previous instantiation failed.
* Best used if the instantiation can really only fail with an out-of-memory error,
* otherwise use a TriStateSingleton.
* Best used via SimpleSingletonWrapper or similar.
* Define a static SimpleSingleton instance via the STATIC_SIMPLE_SINGLETON macro.
*/
struct SimpleSingleton {
void *fInstance;
/**
* Returns the singleton instance, or NULL if it could not be created.
* Calls the instantiator with the context if the instance has not been
* created yet. In a race condition, the duplicate may not be NULL.
* The caller must delete the duplicate.
* The caller need not initialize the duplicate before the call.
*/
void *getInstance(InstantiatorFn *instantiator, const void *context,
void *&duplicate,
UErrorCode &errorCode);
/**
* Resets the fields. The caller must have deleted the singleton instance.
* Not mutexed.
* Call this from a cleanup function.
*/
void reset() { fInstance=NULL; }
};
#define STATIC_SIMPLE_SINGLETON(name) static SimpleSingleton name={ NULL }
/**
* Handy wrapper for a SimpleSingleton.
* Intended for temporary use on the stack, to make the SimpleSingleton easier to deal with.
* Takes care of the duplicate deletion and type casting.
*/
template<typename T>
class SimpleSingletonWrapper {
public:
SimpleSingletonWrapper(SimpleSingleton &s) : singleton(s) {}
void deleteInstance() {
delete (T *)singleton.fInstance;
singleton.reset();
}
T *getInstance(InstantiatorFn *instantiator, const void *context,
UErrorCode &errorCode) {
void *duplicate;
T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode);
delete (T *)duplicate;
return instance;
}
private:
SimpleSingleton &singleton;
};
/**
* Singleton struct with shared instantiation/mutexing code.
* Tri-state: Instantiation succeeded/failed/not attempted yet.
* Best used via TriStateSingletonWrapper or similar.
* Define a static TriStateSingleton instance via the STATIC_TRI_STATE_SINGLETON macro.
*/
struct TriStateSingleton {
void *fInstance;
UErrorCode fErrorCode;
/**
* Returns the singleton instance, or NULL if it could not be created.
* Calls the instantiator with the context if the instance has not been
* created yet. In a race condition, the duplicate may not be NULL.
* The caller must delete the duplicate.
* The caller need not initialize the duplicate before the call.
* The singleton creation is only attempted once. If it fails,
* the singleton will then always return NULL.
*/
void *getInstance(InstantiatorFn *instantiator, const void *context,
void *&duplicate,
UErrorCode &errorCode);
/**
* Resets the fields. The caller must have deleted the singleton instance.
* Not mutexed.
* Call this from a cleanup function.
*/
void reset();
};
#define STATIC_TRI_STATE_SINGLETON(name) static TriStateSingleton name={ NULL, U_ZERO_ERROR }
/**
* Handy wrapper for a TriStateSingleton.
* Intended for temporary use on the stack, to make the TriStateSingleton easier to deal with.
* Takes care of the duplicate deletion and type casting.
*/
template<typename T>
class TriStateSingletonWrapper {
public:
TriStateSingletonWrapper(TriStateSingleton &s) : singleton(s) {}
void deleteInstance() {
delete (T *)singleton.fInstance;
singleton.reset();
}
T *getInstance(InstantiatorFn *instantiator, const void *context,
UErrorCode &errorCode) {
void *duplicate;
T *instance=(T *)singleton.getInstance(instantiator, context, duplicate, errorCode);
delete (T *)duplicate;
return instance;
}
private:
TriStateSingleton &singleton;
};
U_NAMESPACE_END
#endif //_MUTEX_
//eof

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,777 @@
/*
*******************************************************************************
*
* Copyright (C) 2009-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: normalizer2impl.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2009nov22
* created by: Markus W. Scherer
*/
#ifndef __NORMALIZER2IMPL_H__
#define __NORMALIZER2IMPL_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/normalizer2.h"
#include "unicode/udata.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "unicode/utf16.h"
#include "mutex.h"
#include "uset_imp.h"
#include "utrie2.h"
U_NAMESPACE_BEGIN
struct CanonIterData;
class Hangul {
public:
/* Korean Hangul and Jamo constants */
enum {
JAMO_L_BASE=0x1100, /* "lead" jamo */
JAMO_V_BASE=0x1161, /* "vowel" jamo */
JAMO_T_BASE=0x11a7, /* "trail" jamo */
HANGUL_BASE=0xac00,
JAMO_L_COUNT=19,
JAMO_V_COUNT=21,
JAMO_T_COUNT=28,
JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,
HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
};
static inline UBool isHangul(UChar32 c) {
return HANGUL_BASE<=c && c<HANGUL_LIMIT;
}
static inline UBool
isHangulWithoutJamoT(UChar c) {
c-=HANGUL_BASE;
return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
}
static inline UBool isJamoL(UChar32 c) {
return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
}
static inline UBool isJamoV(UChar32 c) {
return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
}
/**
* Decomposes c, which must be a Hangul syllable, into buffer
* and returns the length of the decomposition (2 or 3).
*/
static inline int32_t decompose(UChar32 c, UChar buffer[3]) {
c-=HANGUL_BASE;
UChar32 c2=c%JAMO_T_COUNT;
c/=JAMO_T_COUNT;
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
if(c2==0) {
return 2;
} else {
buffer[2]=(UChar)(JAMO_T_BASE+c2);
return 3;
}
}
/**
* Decomposes c, which must be a Hangul syllable, into buffer.
* This is the raw, not recursive, decomposition. Its length is always 2.
*/
static inline void getRawDecomposition(UChar32 c, UChar buffer[2]) {
UChar32 orig=c;
c-=HANGUL_BASE;
UChar32 c2=c%JAMO_T_COUNT;
if(c2==0) {
c/=JAMO_T_COUNT;
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
} else {
buffer[0]=orig-c2; // LV syllable
buffer[1]=(UChar)(JAMO_T_BASE+c2);
}
}
private:
Hangul(); // no instantiation
};
class Normalizer2Impl;
class ReorderingBuffer : public UMemory {
public:
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
impl(ni), str(dest),
start(NULL), reorderStart(NULL), limit(NULL),
remainingCapacity(0), lastCC(0) {}
~ReorderingBuffer() {
if(start!=NULL) {
str.releaseBuffer((int32_t)(limit-start));
}
}
UBool init(int32_t destCapacity, UErrorCode &errorCode);
UBool isEmpty() const { return start==limit; }
int32_t length() const { return (int32_t)(limit-start); }
UChar *getStart() { return start; }
UChar *getLimit() { return limit; }
uint8_t getLastCC() const { return lastCC; }
UBool equals(const UChar *start, const UChar *limit) const;
// For Hangul composition, replacing the Leading consonant Jamo with the syllable.
void setLastChar(UChar c) {
*(limit-1)=c;
}
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ?
appendBMP((UChar)c, cc, errorCode) :
appendSupplementary(c, cc, errorCode);
}
// s must be in NFD, otherwise change the implementation.
UBool append(const UChar *s, int32_t length,
uint8_t leadCC, uint8_t trailCC,
UErrorCode &errorCode);
UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
if(remainingCapacity==0 && !resize(1, errorCode)) {
return FALSE;
}
if(lastCC<=cc || cc==0) {
*limit++=c;
lastCC=cc;
if(cc<=1) {
reorderStart=limit;
}
} else {
insert(c, cc);
}
--remainingCapacity;
return TRUE;
}
UBool appendZeroCC(UChar32 c, UErrorCode &errorCode);
UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode);
void remove();
void removeSuffix(int32_t suffixLength);
void setReorderingLimit(UChar *newLimit) {
remainingCapacity+=(int32_t)(limit-newLimit);
reorderStart=limit=newLimit;
lastCC=0;
}
void copyReorderableSuffixTo(UnicodeString &s) const {
s.setTo(reorderStart, (int32_t)(limit-reorderStart));
}
private:
/*
* TODO: Revisit whether it makes sense to track reorderStart.
* It is set to after the last known character with cc<=1,
* which stops previousCC() before it reads that character and looks up its cc.
* previousCC() is normally only called from insert().
* In other words, reorderStart speeds up the insertion of a combining mark
* into a multi-combining mark sequence where it does not belong at the end.
* This might not be worth the trouble.
* On the other hand, it's not a huge amount of trouble.
*
* We probably need it for UNORM_SIMPLE_APPEND.
*/
UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode);
void insert(UChar32 c, uint8_t cc);
static void writeCodePoint(UChar *p, UChar32 c) {
if(c<=0xffff) {
*p=(UChar)c;
} else {
p[0]=U16_LEAD(c);
p[1]=U16_TRAIL(c);
}
}
UBool resize(int32_t appendLength, UErrorCode &errorCode);
const Normalizer2Impl &impl;
UnicodeString &str;
UChar *start, *reorderStart, *limit;
int32_t remainingCapacity;
uint8_t lastCC;
// private backward iterator
void setIterator() { codePointStart=limit; }
void skipPrevious(); // Requires start<codePointStart.
uint8_t previousCC(); // Returns 0 if there is no previous character.
UChar *codePointStart, *codePointLimit;
};
class U_COMMON_API Normalizer2Impl : public UMemory {
public:
Normalizer2Impl() : memory(NULL), normTrie(NULL) {
canonIterDataSingleton.fInstance=NULL;
}
~Normalizer2Impl();
void load(const char *packageName, const char *name, UErrorCode &errorCode);
void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
// low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
UBool ensureCanonIterData(UErrorCode &errorCode) const;
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
return UNORM_YES;
} else if(minMaybeYes<=norm16) {
return UNORM_MAYBE;
} else {
return UNORM_NO;
}
}
UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; }
UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
uint8_t getCC(uint16_t norm16) const {
if(norm16>=MIN_NORMAL_MAYBE_YES) {
return (uint8_t)norm16;
}
if(norm16<minNoNo || limitNoNo<=norm16) {
return 0;
}
return getCCFromNoNo(norm16);
}
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
}
/**
* Returns the FCD data for code point c.
* @param c A Unicode code point.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t getFCD16(UChar32 c) const {
if(c<0) {
return 0;
} else if(c<0x180) {
return tccc180[c];
} else if(c<=0xffff) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
}
return getFCD16FromNormData(c);
}
/**
* Returns the FCD data for the next code point (post-increment).
* Might skip only a lead surrogate rather than the whole surrogate pair if none of
* the supplementary code points associated with the lead surrogate have non-zero FCD data.
* @param s A valid pointer into a string. Requires s!=limit.
* @param limit The end of the string, or NULL.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
UChar32 c=*s++;
if(c<0x180) {
return tccc180[c];
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
}
UChar c2;
if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
c=U16_GET_SUPPLEMENTARY(c, c2);
++s;
}
return getFCD16FromNormData(c);
}
/**
* Returns the FCD data for the previous code point (pre-decrement).
* @param start The start of the string.
* @param s A valid pointer into a string. Requires start<s.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
UChar32 c=*--s;
if(c<0x180) {
return tccc180[c];
}
if(!U16_IS_TRAIL(c)) {
if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
}
} else {
UChar c2;
if(start<s && U16_IS_LEAD(c2=*(s-1))) {
c=U16_GET_SUPPLEMENTARY(c2, c);
--s;
}
}
return getFCD16FromNormData(c);
}
/** Returns the FCD data for U+0000<=c<U+0180. */
uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
/** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff
uint8_t bits=smallFCD[lead>>8];
if(bits==0) { return false; }
return (UBool)((bits>>((lead>>5)&7))&1);
}
/** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
/**
* Gets the decomposition for one code point.
* @param c code point
* @param buffer out-only buffer for algorithmic decompositions
* @param length out-only, takes the length of the decomposition, if any
* @return pointer to the decomposition, or NULL if none
*/
const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const;
/**
* Gets the raw decomposition for one code point.
* @param c code point
* @param buffer out-only buffer for algorithmic decompositions
* @param length out-only, takes the length of the decomposition, if any
* @return pointer to the decomposition, or NULL if none
*/
const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const;
UChar32 composePair(UChar32 a, UChar32 b) const;
UBool isCanonSegmentStarter(UChar32 c) const;
UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
enum {
MIN_CCC_LCCC_CP=0x300
};
enum {
MIN_YES_YES_WITH_CC=0xff01,
JAMO_VT=0xff00,
MIN_NORMAL_MAYBE_YES=0xfe00,
JAMO_L=1,
MAX_DELTA=0x40
};
enum {
// Byte offsets from the start of the data, after the generic header.
IX_NORM_TRIE_OFFSET,
IX_EXTRA_DATA_OFFSET,
IX_SMALL_FCD_OFFSET,
IX_RESERVED3_OFFSET,
IX_RESERVED4_OFFSET,
IX_RESERVED5_OFFSET,
IX_RESERVED6_OFFSET,
IX_TOTAL_SIZE,
// Code point thresholds for quick check codes.
IX_MIN_DECOMP_NO_CP,
IX_MIN_COMP_NO_MAYBE_CP,
// Norm16 value thresholds for quick check combinations and types of extra data.
IX_MIN_YES_NO, // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
IX_MIN_NO_NO,
IX_LIMIT_NO_NO,
IX_MIN_MAYBE_YES,
IX_MIN_YES_NO_MAPPINGS_ONLY, // Mappings only in [minYesNoMappingsOnly..minNoNo[.
IX_RESERVED15,
IX_COUNT
};
enum {
MAPPING_HAS_CCC_LCCC_WORD=0x80,
MAPPING_HAS_RAW_MAPPING=0x40,
MAPPING_NO_COMP_BOUNDARY_AFTER=0x20,
MAPPING_LENGTH_MASK=0x1f
};
enum {
COMP_1_LAST_TUPLE=0x8000,
COMP_1_TRIPLE=1,
COMP_1_TRAIL_LIMIT=0x3400,
COMP_1_TRAIL_MASK=0x7ffe,
COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit
COMP_2_TRAIL_SHIFT=6,
COMP_2_TRAIL_MASK=0xffc0
};
// higher-level functionality ------------------------------------------ ***
const UChar *decompose(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void decomposeAndAppend(const UChar *src, const UChar *limit,
UBool doDecompose,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
UBool compose(const UChar *src, const UChar *limit,
UBool onlyContiguous,
UBool doCompose,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
const UChar *composeQuickCheck(const UChar *src, const UChar *limit,
UBool onlyContiguous,
UNormalizationCheckResult *pQCResult) const;
void composeAndAppend(const UChar *src, const UChar *limit,
UBool doCompose,
UBool onlyContiguous,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
const UChar *makeFCD(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const UChar *src, const UChar *limit,
UBool doMakeFCD,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
UBool hasDecompBoundary(UChar32 c, UBool before) const;
UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
UBool hasCompBoundaryBefore(UChar32 c) const {
return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
UBool hasFCDBoundaryAfter(UChar32 c) const {
uint16_t fcd16=getFCD16(c);
return fcd16<=1 || (fcd16&0xff)==0;
}
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private:
static UBool U_CALLCONV
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==0; }
static UBool isJamoL(uint16_t norm16) { return norm16==1; }
static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }
UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
// UBool isCompYes(uint16_t norm16) const {
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
// }
// UBool isCompYesOrMaybe(uint16_t norm16) const {
// return norm16<minNoNo || minMaybeYes<=norm16;
// }
// UBool hasZeroCCFromDecompYes(uint16_t norm16) const {
// return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
// }
UBool isDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo ||
norm16==JAMO_VT ||
(minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
}
/**
* A little faster and simpler than isDecompYesAndZeroCC() but does not include
* the MaybeYes which combine-forward and have ccc=0.
* (Standard Unicode 5.2 normalization does not have such characters.)
*/
UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
}
UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; }
// For use with isCompYes().
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
// static uint8_t getCCFromYes(uint16_t norm16) {
// return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
// }
uint8_t getCCFromNoNo(uint16_t norm16) const {
const uint16_t *mapping=getMapping(norm16);
if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) {
return (uint8_t)*(mapping-1);
} else {
return 0;
}
}
// requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const;
// Requires algorithmic-NoNo.
UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
return c+norm16-(minMaybeYes-MAX_DELTA-1);
}
// Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; }
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL;
} else if(norm16<minMaybeYes) {
return extraData+norm16; // for yesYes; if Jamo L: harmless empty list
} else {
return maybeYesCompositions+norm16-minMaybeYes;
}
}
const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
const uint16_t *list=extraData+norm16; // composite has both mapping & compositions list
return list+ // mapping pointer
1+ // +1 to skip the first unit with the mapping lenth
(*list&MAPPING_LENGTH_MASK); // + mapping length
}
/**
* @param c code point must have compositions
* @return compositions list pointer
*/
const uint16_t *getCompositionsList(uint16_t norm16) const {
return isDecompYes(norm16) ?
getCompositionsListForDecompYes(norm16) :
getCompositionsListForComposite(norm16);
}
const UChar *copyLowPrefixFromNulTerminated(const UChar *src,
UChar32 minNeedDataCP,
ReorderingBuffer *buffer,
UErrorCode &errorCode) const;
UBool decomposeShort(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
UBool decompose(UChar32 c, uint16_t norm16,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail);
void addComposites(const uint16_t *list, UnicodeSet &set) const;
void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
UBool onlyContiguous) const;
UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
int32_t getCanonValue(UChar32 c) const;
const UnicodeSet &getCanonStartSet(int32_t n) const;
UDataMemory *memory;
UVersionInfo dataVersion;
// Code point thresholds for quick check codes.
UChar32 minDecompNoCP;
UChar32 minCompNoMaybeCP;
// Norm16 value thresholds for quick check combinations and types of extra data.
uint16_t minYesNo;
uint16_t minYesNoMappingsOnly;
uint16_t minNoNo;
uint16_t limitNoNo;
uint16_t minMaybeYes;
UTrie2 *normTrie;
const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
SimpleSingleton canonIterDataSingleton;
};
// bits in canonIterData
#define CANON_NOT_SEGMENT_STARTER 0x80000000
#define CANON_HAS_COMPOSITIONS 0x40000000
#define CANON_HAS_SET 0x200000
#define CANON_VALUE_MASK 0x1fffff
/**
* ICU-internal shortcut for quick access to standard Unicode normalization.
*/
class U_COMMON_API Normalizer2Factory {
public:
static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);
static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);
static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);
static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode);
static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
// Get the Impl instance of the Normalizer2.
// Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
private:
Normalizer2Factory(); // No instantiation.
};
U_NAMESPACE_END
U_CAPI int32_t U_EXPORT2
unorm2_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
/**
* Get the NF*_QC property for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
/**
* Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC uint16_t
unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.
* Format version 2.0.
*
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
* Custom (application-specific) data can be built into additional .nrm files
* with the gennorm2 build tool.
*
* Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
* cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
*
* A .nrm file begins with a standard ICU data file header
* (DataHeader, see ucmndata.h and unicode/udata.h).
* The UDataInfo.dataVersion field usually contains the Unicode version
* for which the data was generated.
*
* After the header, the file contains the following parts.
* Constants are defined as enum values of the Normalizer2Impl class.
*
* Many details of the data structures are described in the design doc
* which is at http://site.icu-project.org/design/normalization/custom
*
* int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
*
* The first eight indexes are byte offsets in ascending order.
* Each byte offset marks the start of the next part in the data file,
* and the end of the previous one.
* When two consecutive byte offsets are the same, then the corresponding part is empty.
* Byte offsets are offsets from after the header,
* that is, from the beginning of the indexes[].
* Each part starts at an offset with proper alignment for its data.
* If necessary, the previous part may include padding bytes to achieve this alignment.
*
* minDecompNoCP=indexes[IX_MIN_DECOMP_NO_CP] is the lowest code point
* with a decomposition mapping, that is, with NF*D_QC=No.
* minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
* with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
*
* The next five indexes are thresholds of 16-bit trie values for ranges of
* values indicating multiple normalization properties.
* minYesNo=indexes[IX_MIN_YES_NO];
* minNoNo=indexes[IX_MIN_NO_NO];
* limitNoNo=indexes[IX_LIMIT_NO_NO];
* minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* See the normTrie description below and the design doc for details.
*
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
*
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits),
* information is extracted primarily via range checks.
* For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
* means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
* which means it has a two-way (round-trip) decomposition mapping.
* Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
* pointing to mappings, compositions lists, or both.
* Value norm16==0 means that the character is normalization-inert, that is,
* it does not have a mapping, does not participate in composition, has a zero
* canonical combining class, and forms a boundary where text before it and after it
* can be normalized independently.
* For details about how multiple properties are encoded in 16-bit values
* see the design doc.
* Note that the encoding cannot express all combinations of the properties involved;
* it only supports those combinations that are allowed by
* the Unicode Normalization algorithms. Details are in the design doc as well.
* The gennorm2 tool only builds .nrm files for data that conforms to the limitations.
*
* The trie has a value for each lead surrogate code unit representing the "worst case"
* properties of the 1024 supplementary characters whose UTF-16 form starts with
* the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
* then their lead surrogate code unit has the trie value 0.
* When the lead surrogate unit's value exceeds the quick check minimum during processing,
* the properties for the full supplementary code point need to be looked up.
*
* uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes];
* uint16_t extraData[];
*
* There is only one byte offset for the end of these two arrays.
* The split between them is given by the constant and variable mentioned above.
*
* The maybeYesCompositions array contains compositions lists for characters that
* combine both forward (as starters in composition pairs)
* and backward (as trailing characters in composition pairs).
* Such characters do not occur in Unicode 5.2 but are allowed by
* the Unicode Normalization algorithms.
* If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES
* and the maybeYesCompositions array is empty.
* If there are such characters, then minMaybeYes is subtracted from their norm16 values
* to get the index into this array.
*
* The extraData array contains compositions lists for "YesYes" characters,
* followed by mappings and optional compositions lists for "YesNo" characters,
* followed by only mappings for "NoNo" characters.
* (Referring to pairs of NFC/NFD quick check values.)
* The norm16 values of those characters are directly indexes into the extraData array.
*
* The data structures for compositions lists and mappings are described in the design doc.
*
* uint8_t smallFCD[0x100]; -- new in format version 2
*
* This is a bit set to help speed up FCD value lookups in the absence of a full
* UTrie2 or other large data structure with the full FCD value mapping.
*
* Each smallFCD bit is set if any of the corresponding 32 BMP code points
* has a non-zero FCD value (lccc!=0 or tccc!=0).
* Bit 0 of smallFCD[0] is for U+0000..U+001F. Bit 7 of smallFCD[0xff] is for U+FFE0..U+FFFF.
* A bit for 32 lead surrogates is set if any of the 32k corresponding
* _supplementary_ code points has a non-zero FCD value.
*
* This bit set is most useful for the large blocks of CJK characters with FCD=0.
*
* Changes from format version 1 to format version 2 ---------------------------
*
* - Addition of data for raw (not recursively decomposed) mappings.
* + The MAPPING_NO_COMP_BOUNDARY_AFTER bit in the extraData is now also set when
* the mapping is to an empty string or when the character combines-forward.
* This subsumes the one actual use of the MAPPING_PLUS_COMPOSITION_LIST bit which
* is then repurposed for the MAPPING_HAS_RAW_MAPPING bit.
* + For details see the design doc.
* - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesNo extraData into
* distinct ranges (combines-forward vs. not)
* so that a range check can be used to find out if there is a compositions list.
* This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
* It is needed for the new (in ICU 49) composePair(), not for other normalization.
* - Addition of the smallFCD[] bit set.
*/
#endif /* !UCONFIG_NO_NORMALIZATION */
#endif /* __NORMALIZER2IMPL_H__ */

Просмотреть файл

@ -0,0 +1,521 @@
/*
*************************************************************************
* COPYRIGHT:
* Copyright (c) 1996-2012, International Business Machines Corporation and
* others. All Rights Reserved.
*************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
#include "unicode/normlzr.h"
#include "unicode/utf16.h"
#include "cmemory.h"
#include "normalizer2impl.h"
#include "uprops.h" // for uniset_getUnicode32Instance()
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
//-------------------------------------------------------------------------
// Constructors and other boilerplate
//-------------------------------------------------------------------------
Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
text(new StringCharacterIterator(str)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init();
}
Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
text(new UCharCharacterIterator(str, length)),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init();
}
Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
text(iter.clone()),
currentIndex(0), nextIndex(0),
buffer(), bufferPos(0)
{
init();
}
Normalizer::Normalizer(const Normalizer &copy) :
UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
text(copy.text->clone()),
currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
buffer(copy.buffer), bufferPos(copy.bufferPos)
{
init();
}
void
Normalizer::init() {
UErrorCode errorCode=U_ZERO_ERROR;
fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
if(fOptions&UNORM_UNICODE_3_2) {
delete fFilteredNorm2;
fNorm2=fFilteredNorm2=
new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
}
if(U_FAILURE(errorCode)) {
errorCode=U_ZERO_ERROR;
fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
}
}
Normalizer::~Normalizer()
{
delete fFilteredNorm2;
delete text;
}
Normalizer*
Normalizer::clone() const
{
return new Normalizer(*this);
}
/**
* Generates a hash code for this iterator.
*/
int32_t Normalizer::hashCode() const
{
return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
}
UBool Normalizer::operator==(const Normalizer& that) const
{
return
this==&that ||
(fUMode==that.fUMode &&
fOptions==that.fOptions &&
*text==*that.text &&
buffer==that.buffer &&
bufferPos==that.bufferPos &&
nextIndex==that.nextIndex);
}
//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------
void U_EXPORT2
Normalizer::normalize(const UnicodeString& source,
UNormalizationMode mode, int32_t options,
UnicodeString& result,
UErrorCode &status) {
if(source.isBogus() || U_FAILURE(status)) {
result.setToBogus();
if(U_SUCCESS(status)) {
status=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&source!=&result) {
dest=&result;
} else {
// the source and result strings are the same object, use a temporary one
dest=&localDest;
}
const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
if(U_SUCCESS(status)) {
if(options&UNORM_UNICODE_3_2) {
FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
normalize(source, *dest, status);
} else {
n2->normalize(source, *dest, status);
}
}
if(dest==&localDest && U_SUCCESS(status)) {
result=*dest;
}
}
}
void U_EXPORT2
Normalizer::compose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status) {
normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
}
void U_EXPORT2
Normalizer::decompose(const UnicodeString& source,
UBool compat, int32_t options,
UnicodeString& result,
UErrorCode &status) {
normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
}
UNormalizationCheckResult
Normalizer::quickCheck(const UnicodeString& source,
UNormalizationMode mode, int32_t options,
UErrorCode &status) {
const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
if(U_SUCCESS(status)) {
if(options&UNORM_UNICODE_3_2) {
return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
quickCheck(source, status);
} else {
return n2->quickCheck(source, status);
}
} else {
return UNORM_MAYBE;
}
}
UBool
Normalizer::isNormalized(const UnicodeString& source,
UNormalizationMode mode, int32_t options,
UErrorCode &status) {
const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
if(U_SUCCESS(status)) {
if(options&UNORM_UNICODE_3_2) {
return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
isNormalized(source, status);
} else {
return n2->isNormalized(source, status);
}
} else {
return FALSE;
}
}
UnicodeString & U_EXPORT2
Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
UnicodeString &result,
UNormalizationMode mode, int32_t options,
UErrorCode &errorCode) {
if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
result.setToBogus();
if(U_SUCCESS(errorCode)) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
}
} else {
UnicodeString localDest;
UnicodeString *dest;
if(&right!=&result) {
dest=&result;
} else {
// the right and result strings are the same object, use a temporary one
dest=&localDest;
}
*dest=left;
const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
if(U_SUCCESS(errorCode)) {
if(options&UNORM_UNICODE_3_2) {
FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
append(*dest, right, errorCode);
} else {
n2->append(*dest, right, errorCode);
}
}
if(dest==&localDest && U_SUCCESS(errorCode)) {
result=*dest;
}
}
return result;
}
//-------------------------------------------------------------------------
// Iteration API
//-------------------------------------------------------------------------
/**
* Return the current character in the normalized text.
*/
UChar32 Normalizer::current() {
if(bufferPos<buffer.length() || nextNormalize()) {
return buffer.char32At(bufferPos);
} else {
return DONE;
}
}
/**
* Return the next character in the normalized text and advance
* the iteration position by one. If the end
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::next() {
if(bufferPos<buffer.length() || nextNormalize()) {
UChar32 c=buffer.char32At(bufferPos);
bufferPos+=U16_LENGTH(c);
return c;
} else {
return DONE;
}
}
/**
* Return the previous character in the normalized text and decrement
* the iteration position by one. If the beginning
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar32 Normalizer::previous() {
if(bufferPos>0 || previousNormalize()) {
UChar32 c=buffer.char32At(bufferPos-1);
bufferPos-=U16_LENGTH(c);
return c;
} else {
return DONE;
}
}
void Normalizer::reset() {
currentIndex=nextIndex=text->setToStart();
clearBuffer();
}
void
Normalizer::setIndexOnly(int32_t index) {
text->setIndex(index); // pins index
currentIndex=nextIndex=text->getIndex();
clearBuffer();
}
/**
* Return the first character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to the beginning of the text.
*/
UChar32 Normalizer::first() {
reset();
return next();
}
/**
* Return the last character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to be just before the
* the input text corresponding to that normalized character.
*/
UChar32 Normalizer::last() {
currentIndex=nextIndex=text->setToEnd();
clearBuffer();
return previous();
}
/**
* Retrieve the current iteration position in the input text that is
* being normalized. This method is useful in applications such as
* searching, where you need to be able to determine the position in
* the input text that corresponds to a given normalized output character.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em>, while
* {@link #next} and {@link #previous} iterate through characters in the
* <em>output</em>. This means that there is not necessarily a one-to-one
* correspondence between characters returned by <tt>next</tt> and
* <tt>previous</tt> and the indices passed to and returned from
* <tt>setIndex</tt> and {@link #getIndex}.
*
*/
int32_t Normalizer::getIndex() const {
if(bufferPos<buffer.length()) {
return currentIndex;
} else {
return nextIndex;
}
}
/**
* Retrieve the index of the start of the input text. This is the begin index
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
int32_t Normalizer::startIndex() const {
return text->startIndex();
}
/**
* Retrieve the index of the end of the input text. This is the end index
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
int32_t Normalizer::endIndex() const {
return text->endIndex();
}
//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------
void
Normalizer::setMode(UNormalizationMode newMode)
{
fUMode = newMode;
init();
}
UNormalizationMode
Normalizer::getUMode() const
{
return fUMode;
}
void
Normalizer::setOption(int32_t option,
UBool value)
{
if (value) {
fOptions |= option;
} else {
fOptions &= (~option);
}
init();
}
UBool
Normalizer::getOption(int32_t option) const
{
return (fOptions & option) != 0;
}
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the input text.
*/
void
Normalizer::setText(const UnicodeString& newText,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = new StringCharacterIterator(newText);
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
reset();
}
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the string.
*/
void
Normalizer::setText(const CharacterIterator& newText,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = newText.clone();
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
reset();
}
void
Normalizer::setText(const UChar* newText,
int32_t length,
UErrorCode &status)
{
if (U_FAILURE(status)) {
return;
}
CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
if (newIter == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
delete text;
text = newIter;
reset();
}
/**
* Copies the text under iteration into the UnicodeString referred to by "result".
* @param result Receives a copy of the text under iteration.
*/
void
Normalizer::getText(UnicodeString& result)
{
text->getText(result);
}
//-------------------------------------------------------------------------
// Private utility methods
//-------------------------------------------------------------------------
void Normalizer::clearBuffer() {
buffer.remove();
bufferPos=0;
}
UBool
Normalizer::nextNormalize() {
clearBuffer();
currentIndex=nextIndex;
text->setIndex(nextIndex);
if(!text->hasNext()) {
return FALSE;
}
// Skip at least one character so we make progress.
UnicodeString segment(text->next32PostInc());
while(text->hasNext()) {
UChar32 c;
if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
text->move32(-1, CharacterIterator::kCurrent);
break;
}
segment.append(c);
}
nextIndex=text->getIndex();
UErrorCode errorCode=U_ZERO_ERROR;
fNorm2->normalize(segment, buffer, errorCode);
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
UBool
Normalizer::previousNormalize() {
clearBuffer();
nextIndex=currentIndex;
text->setIndex(currentIndex);
if(!text->hasPrevious()) {
return FALSE;
}
UnicodeString segment;
while(text->hasPrevious()) {
UChar32 c=text->previous32();
segment.insert(0, c);
if(fNorm2->hasBoundaryBefore(c)) {
break;
}
}
currentIndex=text->getIndex();
UErrorCode errorCode=U_ZERO_ERROR;
fNorm2->normalize(segment, buffer, errorCode);
bufferPos=buffer.length();
return U_SUCCESS(errorCode) && !buffer.isEmpty();
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_NORMALIZATION */

Просмотреть файл

@ -0,0 +1,21 @@
/*
**********************************************************************
* Copyright (C) 2003-2003, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/parsepos.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition)
ParsePosition::~ParsePosition() {}
ParsePosition *
ParsePosition::clone() const {
return new ParsePosition(*this);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,218 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: patternprops.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar13
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "patternprops.h"
U_NAMESPACE_BEGIN
/*
* One byte per Latin-1 character.
* Bit 0 is set if either Pattern property is true,
* bit 1 if Pattern_Syntax is true,
* bit 2 if Pattern_White_Space is true.
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
*/
static const uint8_t latin1[256]={
// WS: 9..D
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// WS: 20 Syntax: 21..2F
5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
// Syntax: 3A..40
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 5B..5E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// Syntax: 60
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 7B..7E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// WS: 85
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: A1..A7, A9, AB, AC, AE
0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
// Syntax: B0, B1, B6, BB, BF
3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: D7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: F7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
};
/*
* One byte per 32 characters from U+2000..U+303F indexing into
* a small table of 32-bit data words.
* The first two data words are all-zeros and all-ones.
*/
static const uint8_t index2000[130]={
2, 3, 4, 0, 0, 0, 0, 0, // 20xx
0, 0, 0, 0, 5, 1, 1, 1, // 21xx
1, 1, 1, 1, 1, 1, 1, 1, // 22xx
1, 1, 1, 1, 1, 1, 1, 1, // 23xx
1, 1, 1, 0, 0, 0, 0, 0, // 24xx
1, 1, 1, 1, 1, 1, 1, 1, // 25xx
1, 1, 1, 1, 1, 1, 1, 1, // 26xx
1, 1, 1, 6, 7, 1, 1, 1, // 27xx
1, 1, 1, 1, 1, 1, 1, 1, // 28xx
1, 1, 1, 1, 1, 1, 1, 1, // 29xx
1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
8, 9 // 3000..303F
};
/*
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
* are mapped to the first two values, other ranges map to appropriate bit patterns.
*/
static const uint32_t syntax2000[]={
0,
0xffffffff,
0xffff0000, // 2: 2010..201F
0x7fff00ff, // 3: 2020..2027, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
/*
* Same as syntax2000, but with additional bits set for the
* Pattern_White_Space characters 200E 200F 2028 2029.
*/
static const uint32_t syntaxOrWhiteSpace2000[]={
0,
0xffffffff,
0xffffc000, // 2: 200E..201F
0x7fff03ff, // 3: 2020..2029, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
UBool
PatternProps::isSyntax(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]>>1)&1;
} else if(c<0x2010) {
return FALSE;
} else if(c<=0x3030) {
uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return FALSE;
}
}
UBool
PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]&1);
} else if(c<0x200e) {
return FALSE;
} else if(c<=0x3030) {
uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return FALSE;
}
}
UBool
PatternProps::isWhiteSpace(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]>>2)&1;
} else if(0x200e<=c && c<=0x2029) {
return c<=0x200f || 0x2028<=c;
} else {
return FALSE;
}
}
const UChar *
PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
while(length>0 && isWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
const UChar *
PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
return s;
}
int32_t start=0;
int32_t limit=length;
while(start<limit && isWhiteSpace(s[start])) {
++start;
}
if(start<limit) {
// There is non-white space at start; we will not move limit below that,
// so we need not test start<limit in the loop.
while(isWhiteSpace(s[limit-1])) {
--limit;
}
}
length=limit-start;
return s+start;
}
UBool
PatternProps::isIdentifier(const UChar *s, int32_t length) {
if(length<=0) {
return FALSE;
}
const UChar *limit=s+length;
do {
if(isSyntaxOrWhiteSpace(*s++)) {
return FALSE;
}
} while(s<limit);
return TRUE;
}
const UChar *
PatternProps::skipIdentifier(const UChar *s, int32_t length) {
while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,89 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: patternprops.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar13
* created by: Markus W. Scherer
*/
#ifndef __PATTERNPROPS_H__
#define __PATTERNPROPS_H__
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
/**
* Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
* Hardcodes these properties, does not load data, does not depend on other ICU classes.
* <p>
* Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
* and both properties only include BMP code points (no supplementary ones).
* Pattern_Syntax includes some unassigned code points.
* <p>
* [:Pattern_White_Space:] =
* [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
* <p>
* [:Pattern_Syntax:] =
* [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
* \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
* \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
* \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
* \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
* @author mscherer
*/
class U_COMMON_API PatternProps {
public:
/**
* @return TRUE if c is a Pattern_Syntax code point.
*/
static UBool isSyntax(UChar32 c);
/**
* @return TRUE if c is a Pattern_Syntax or Pattern_White_Space code point.
*/
static UBool isSyntaxOrWhiteSpace(UChar32 c);
/**
* @return TRUE if c is a Pattern_White_Space character.
*/
static UBool isWhiteSpace(UChar32 c);
/**
* Skips over Pattern_White_Space starting at s.
* @return The smallest pointer at or after s with a non-white space character.
*/
static const UChar *skipWhiteSpace(const UChar *s, int32_t length);
/**
* @return s except with leading and trailing Pattern_White_Space removed and length adjusted.
*/
static const UChar *trimWhiteSpace(const UChar *s, int32_t &length);
/**
* Tests whether the string contains a "pattern identifier", that is,
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
* @return TRUE if there are no Pattern_White_Space or Pattern_Syntax characters in s.
*/
static UBool isIdentifier(const UChar *s, int32_t length);
/**
* Skips over a "pattern identifier" starting at index s.
* @return The smallest pointer at or after s with
* a Pattern_White_Space or Pattern_Syntax character.
*/
static const UChar *skipIdentifier(const UChar *s, int32_t length);
private:
PatternProps(); // no constructor: all static methods
};
U_NAMESPACE_END
#endif // __PATTERNPROPS_H__

Просмотреть файл

@ -0,0 +1,327 @@
/*
**********************************************************************
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
**********************************************************************
*/
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
#include "unicode/uscript.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "ucln_cmn.h"
#include "uarrsort.h"
#include "uinvchar.h"
#define INCLUDED_FROM_PROPNAME_CPP
#include "propname_data.h"
U_CDECL_BEGIN
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static inline int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
int32_t PropNameData::findProperty(int32_t property) {
int32_t i=1; // valueMaps index, initially after numRanges
for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
// Read and skip the start and limit of this range.
int32_t start=valueMaps[i];
int32_t limit=valueMaps[i+1];
i+=2;
if(property<start) {
break;
}
if(property<limit) {
return i+(property-start)*2;
}
i+=(limit-start)*2; // Skip all entries for this range.
}
return 0;
}
int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
if(valueMapIndex==0) {
return 0; // The property does not have named values.
}
++valueMapIndex; // Skip the BytesTrie offset.
int32_t numRanges=valueMaps[valueMapIndex++];
if(numRanges<0x10) {
// Ranges of values.
for(; numRanges>0; --numRanges) {
// Read and skip the start and limit of this range.
int32_t start=valueMaps[valueMapIndex];
int32_t limit=valueMaps[valueMapIndex+1];
valueMapIndex+=2;
if(value<start) {
break;
}
if(value<limit) {
return valueMaps[valueMapIndex+value-start];
}
valueMapIndex+=limit-start; // Skip all entries for this range.
}
} else {
// List of values.
int32_t valuesStart=valueMapIndex;
int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
do {
int32_t v=valueMaps[valueMapIndex];
if(value<v) {
break;
}
if(value==v) {
return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
}
} while(++valueMapIndex<nameGroupOffsetsStart);
}
return 0;
}
const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
int32_t numNames=*nameGroup++;
if(nameIndex<0 || numNames<=nameIndex) {
return NULL;
}
// Skip nameIndex names.
for(; nameIndex>0; --nameIndex) {
nameGroup=uprv_strchr(nameGroup, 0)+1;
}
if(*nameGroup==0) {
return NULL; // no name (Property[Value]Aliases.txt has "n/a")
}
return nameGroup;
}
UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
if(name==NULL) {
return FALSE;
}
UStringTrieResult result=USTRINGTRIE_NO_VALUE;
char c;
while((c=*name++)!=0) {
c=uprv_invCharToLowercaseAscii(c);
// Ignore delimiters '-', '_', and ASCII White_Space.
if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
continue;
}
if(!USTRINGTRIE_HAS_NEXT(result)) {
return FALSE;
}
result=trie.next((uint8_t)c);
}
return USTRINGTRIE_HAS_VALUE(result);
}
const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return NULL; // Not a known property.
}
return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
}
const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return NULL; // Not a known property.
}
int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
if(nameGroupOffset==0) {
return NULL;
}
return getName(nameGroups+nameGroupOffset, nameChoice);
}
int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
BytesTrie trie(bytesTries+bytesTrieOffset);
if(containsName(trie, alias)) {
return trie.getValue();
} else {
return UCHAR_INVALID_CODE;
}
}
int32_t PropNameData::getPropertyEnum(const char *alias) {
return getPropertyOrValueEnum(0, alias);
}
int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
int32_t valueMapIndex=findProperty(property);
if(valueMapIndex==0) {
return UCHAR_INVALID_CODE; // Not a known property.
}
valueMapIndex=valueMaps[valueMapIndex+1];
if(valueMapIndex==0) {
return UCHAR_INVALID_CODE; // The property does not have named values.
}
// valueMapIndex is the start of the property's valueMap,
// where the first word is the BytesTrie offset.
return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
U_NAMESPACE_END
//----------------------------------------------------------------------
// Public API implementation
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
U_NAMESPACE_USE
return PropNameData::getPropertyName(property, nameChoice);
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
U_NAMESPACE_USE
return (UProperty)PropNameData::getPropertyEnum(alias);
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
U_NAMESPACE_USE
return PropNameData::getPropertyValueName(property, value, nameChoice);
}
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
U_NAMESPACE_USE
return PropNameData::getPropertyValueEnum(property, alias);
}
U_CAPI const char* U_EXPORT2
uscript_getName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_LONG_PROPERTY_NAME);
}
U_CAPI const char* U_EXPORT2
uscript_getShortName(UScriptCode scriptCode){
return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
U_SHORT_PROPERTY_NAME);
}

Просмотреть файл

@ -0,0 +1,210 @@
/*
**********************************************************************
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: October 30 2002
* Since: ICU 2.4
* 2010nov19 Markus Scherer Rewrite for formatVersion 2.
**********************************************************************
*/
#ifndef PROPNAME_H
#define PROPNAME_H
#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/uchar.h"
#include "udataswp.h"
#include "uprops.h"
/*
* This header defines the in-memory layout of the property names data
* structure representing the UCD data files PropertyAliases.txt and
* PropertyValueAliases.txt. It is used by:
* propname.cpp - reads data
* genpname - creates data
*/
/* low-level char * property name comparison -------------------------------- */
U_CDECL_BEGIN
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
#else
# error U_CHARSET_FAMILY is not valid
#endif
U_CDECL_END
/* UDataMemory structure and signatures ------------------------------------- */
#define PNAME_DATA_NAME "pnames"
#define PNAME_DATA_TYPE "icu"
/* Fields in UDataInfo: */
/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */
#define PNAME_SIG_0 ((uint8_t)0x70) /* p */
#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */
#define PNAME_SIG_2 ((uint8_t)0x61) /* a */
#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */
U_NAMESPACE_BEGIN
class PropNameData {
public:
enum {
// Byte offsets from the start of the data, after the generic header.
IX_VALUE_MAPS_OFFSET,
IX_BYTE_TRIES_OFFSET,
IX_NAME_GROUPS_OFFSET,
IX_RESERVED3_OFFSET,
IX_RESERVED4_OFFSET,
IX_TOTAL_SIZE,
// Other values.
IX_MAX_NAME_LENGTH,
IX_RESERVED7,
IX_COUNT
};
static const char *getPropertyName(int32_t property, int32_t nameChoice);
static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);
static int32_t getPropertyEnum(const char *alias);
static int32_t getPropertyValueEnum(int32_t property, const char *alias);
private:
static int32_t findProperty(int32_t property);
static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
static const char *getName(const char *nameGroup, int32_t nameIndex);
static UBool containsName(BytesTrie &trie, const char *name);
static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias);
static const int32_t indexes[];
static const int32_t valueMaps[];
static const uint8_t bytesTries[];
static const char nameGroups[];
};
/*
* pnames.icu formatVersion 2
*
* formatVersion 2 is new in ICU 4.8.
* In ICU 4.8, the pnames.icu data file is used only in ICU4J.
* ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
*
* For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
* or earlier versions of this header file (source/common/propname.h).
*
* The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
* After that:
*
* int32_t indexes[8];
*
* (See the PropNameData::IX_... constants.)
*
* The first 6 indexes are byte offsets from the beginning of the data
* (beginning of indexes[]) to following structures.
* The length of each structure is the difference between its offset
* and the next one.
* All offsets are filled in: Where there is no data between two offsets,
* those two offsets are the same.
* The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
* total number of bytes in the file. (Not counting the standard headers.)
*
* The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
* maximum length of any Unicode property (or property value) alias.
* (Without normalization, that is, including underscores etc.)
*
* int32_t valueMaps[];
*
* The valueMaps[] begins with a map from UProperty enums to properties,
* followed by the per-property value maps from property values to names,
* for those properties that have named values.
* (Binary & enumerated, plus General_Category_Mask.)
*
* valueMaps[0] contains the number of UProperty enum ranges.
* For each range:
* int32_t start, limit -- first and last+1 UProperty enum of a dense range
* Followed by (limit-start) pairs of
* int32_t nameGroupOffset;
* Offset into nameGroups[] for the property's names/aliases.
* int32_t valueMapIndex;
* Offset of the property's value map in the valueMaps[] array.
* If the valueMapIndex is 0, then the property does not have named values.
*
* For each property's value map:
* int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping.
* int32_t numRanges;
* If numRanges is in the range 1..15, then that many ranges of values follow.
* Per range:
* int32_t start, limit -- first and last+1 UProperty enum of a range
* Followed by (limit-start) entries of
* int32_t nameGroupOffset;
* Offset into nameGroups[] for the property value's names/aliases.
* If the nameGroupOffset is 0, then this is not a named value for this property.
* (That is, the ranges need not be dense.)
* If numRanges is >=0x10, then (numRanges-0x10) sorted values
* and then (numRanges-0x10) corresponding nameGroupOffsets follow.
* Values are sorted as signed integers.
* In this case, the set of values is dense; no nameGroupOffset will be 0.
*
* For both properties and property values, ranges are sorted by their start/limit values.
*
* uint8_t bytesTries[];
*
* This is a sequence of BytesTrie structures, byte-serialized tries for
* mapping from names/aliases to values.
* The first one maps from property names/aliases to UProperty enum constants.
* The following ones are indexed by property value map bytesTrieOffsets
* for mapping each property's names/aliases to their property values.
*
* char nameGroups[];
*
* This is a sequence of property name groups.
* Each group is a list of names/aliases (invariant-character strings) for
* one property or property value, in the order of UCharNameChoice.
* The first byte of each group is the number of names in the group.
* It is followed by that many NUL-terminated strings.
* The first string is for the short name; if there is no short name,
* then the first string is empty.
* The second string is the long name. Further strings are additional aliases.
*
* The first name group is for a property rather than a property value,
* so that a nameGroupOffset of 0 can be used to indicate "no value"
* in a property's sparse value ranges.
*/
U_NAMESPACE_END
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,525 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: propsvec.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store bits (Unicode character properties) in bit set vectors.
*/
#include <stdlib.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "utrie.h"
#include "utrie2.h"
#include "uarrsort.h"
#include "propsvec.h"
#include "uassert.h"
struct UPropsVectors {
uint32_t *v;
int32_t columns; /* number of columns, plus two for start & limit values */
int32_t maxRows;
int32_t rows;
int32_t prevRow; /* search optimization: remember last row seen */
UBool isCompacted;
};
#define UPVEC_INITIAL_ROWS (1<<12)
#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode) {
UPropsVectors *pv;
uint32_t *v, *row;
uint32_t cp;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(columns<1) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
columns+=2; /* count range start and limit columns */
pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors));
v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4);
if(pv==NULL || v==NULL) {
uprv_free(pv);
uprv_free(v);
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(pv, 0, sizeof(UPropsVectors));
pv->v=v;
pv->columns=columns;
pv->maxRows=UPVEC_INITIAL_ROWS;
pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP);
/* set the all-Unicode row and the special-value rows */
row=pv->v;
uprv_memset(row, 0, pv->rows*columns*4);
row[0]=0;
row[1]=0x110000;
row+=columns;
for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) {
row[0]=cp;
row[1]=cp+1;
row+=columns;
}
return pv;
}
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv) {
if(pv!=NULL) {
uprv_free(pv->v);
uprv_free(pv);
}
}
static uint32_t *
_findRow(UPropsVectors *pv, UChar32 rangeStart) {
uint32_t *row;
int32_t columns, i, start, limit, prevRow;
columns=pv->columns;
limit=pv->rows;
prevRow=pv->prevRow;
/* check the vicinity of the last-seen row (start searching with an unrolled loop) */
row=pv->v+prevRow*columns;
if(rangeStart>=(UChar32)row[0]) {
if(rangeStart<(UChar32)row[1]) {
/* same row as last seen */
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* next row after the last one */
pv->prevRow=prevRow+1;
return row;
} else if(rangeStart<(UChar32)(row+=columns)[1]) {
/* second row after the last one */
pv->prevRow=prevRow+2;
return row;
} else if((rangeStart-(UChar32)row[1])<10) {
/* we are close, continue looping */
prevRow+=2;
do {
++prevRow;
row+=columns;
} while(rangeStart>=(UChar32)row[1]);
pv->prevRow=prevRow;
return row;
}
} else if(rangeStart<(UChar32)pv->v[1]) {
/* the very first row */
pv->prevRow=0;
return pv->v;
}
/* do a binary search for the start of the range */
start=0;
while(start<limit-1) {
i=(start+limit)/2;
row=pv->v+i*columns;
if(rangeStart<(UChar32)row[0]) {
limit=i;
} else if(rangeStart<(UChar32)row[1]) {
pv->prevRow=i;
return row;
} else {
start=i;
}
}
/* must be found because all ranges together always cover all of Unicode */
pv->prevRow=start;
return pv->v+start*columns;
}
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode) {
uint32_t *firstRow, *lastRow;
int32_t columns;
UChar32 limit;
UBool splitFirstRow, splitLastRow;
/* argument checking */
if(U_FAILURE(*pErrorCode)) {
return;
}
if( pv==NULL ||
start<0 || start>end || end>UPVEC_MAX_CP ||
column<0 || column>=(pv->columns-2)
) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pv->isCompacted) {
*pErrorCode=U_NO_WRITE_PERMISSION;
return;
}
limit=end+1;
/* initialize */
columns=pv->columns;
column+=2; /* skip range start and limit columns */
value&=mask;
/* find the rows whose ranges overlap with the input range */
/* find the first and last rows, always successful */
firstRow=_findRow(pv, start);
lastRow=_findRow(pv, end);
/*
* Rows need to be split if they partially overlap with the
* input range (only possible for the first and last rows)
* and if their value differs from the input value.
*/
splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
/* split first/last rows if necessary */
if(splitFirstRow || splitLastRow) {
int32_t count, rows;
rows=pv->rows;
if((rows+splitFirstRow+splitLastRow)>pv->maxRows) {
uint32_t *newVectors;
int32_t newMaxRows;
if(pv->maxRows<UPVEC_MEDIUM_ROWS) {
newMaxRows=UPVEC_MEDIUM_ROWS;
} else if(pv->maxRows<UPVEC_MAX_ROWS) {
newMaxRows=UPVEC_MAX_ROWS;
} else {
/* Implementation bug, or UPVEC_MAX_ROWS too low. */
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4);
if(newVectors==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_memcpy(newVectors, pv->v, rows*columns*4);
firstRow=newVectors+(firstRow-pv->v);
lastRow=newVectors+(lastRow-pv->v);
uprv_free(pv->v);
pv->v=newVectors;
pv->maxRows=newMaxRows;
}
/* count the number of row cells to move after the last row, and move them */
count = (int32_t)((pv->v+rows*columns)-(lastRow+columns));
if(count>0) {
uprv_memmove(
lastRow+(1+splitFirstRow+splitLastRow)*columns,
lastRow+columns,
count*4);
}
pv->rows=rows+splitFirstRow+splitLastRow;
/* split the first row, and move the firstRow pointer to the second part */
if(splitFirstRow) {
/* copy all affected rows up one and move the lastRow pointer */
count = (int32_t)((lastRow-firstRow)+columns);
uprv_memmove(firstRow+columns, firstRow, count*4);
lastRow+=columns;
/* split the range and move the firstRow pointer */
firstRow[1]=firstRow[columns]=(uint32_t)start;
firstRow+=columns;
}
/* split the last row */
if(splitLastRow) {
/* copy the last row data */
uprv_memcpy(lastRow+columns, lastRow, columns*4);
/* split the range and move the firstRow pointer */
lastRow[1]=lastRow[columns]=(uint32_t)limit;
}
}
/* set the "row last seen" to the last row for the range */
pv->prevRow=(int32_t)((lastRow-(pv->v))/columns);
/* set the input value in all remaining rows */
firstRow+=column;
lastRow+=column;
mask=~mask;
for(;;) {
*firstRow=(*firstRow&mask)|value;
if(firstRow==lastRow) {
break;
}
firstRow+=columns;
}
}
U_CAPI uint32_t U_EXPORT2
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) {
uint32_t *row;
UPropsVectors *ncpv;
if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) {
return 0;
}
ncpv=(UPropsVectors *)pv;
row=_findRow(ncpv, c);
return row[2+column];
}
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd) {
uint32_t *row;
int32_t columns;
if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) {
return NULL;
}
columns=pv->columns;
row=pv->v+rowIndex*columns;
if(pRangeStart!=NULL) {
*pRangeStart=(UChar32)row[0];
}
if(pRangeEnd!=NULL) {
*pRangeEnd=(UChar32)row[1]-1;
}
return row+2;
}
static int32_t U_CALLCONV
upvec_compareRows(const void *context, const void *l, const void *r) {
const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
const UPropsVectors *pv=(const UPropsVectors *)context;
int32_t i, count, columns;
count=columns=pv->columns; /* includes start/limit columns */
/* start comparing after start/limit but wrap around to them */
i=2;
do {
if(left[i]!=right[i]) {
return left[i]<right[i] ? -1 : 1;
}
if(++i==columns) {
i=0;
}
} while(--count>0);
return 0;
}
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
uint32_t *row;
int32_t i, columns, valueColumns, rows, count;
UChar32 start, limit;
/* argument checking */
if(U_FAILURE(*pErrorCode)) {
return;
}
if(handler==NULL) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(pv->isCompacted) {
return;
}
/* Set the flag now: Sorting and compacting destroys the builder data structure. */
pv->isCompacted=TRUE;
rows=pv->rows;
columns=pv->columns;
U_ASSERT(columns>=3); /* upvec_open asserts this */
valueColumns=columns-2; /* not counting start & limit */
/* sort the properties vectors to find unique vector values */
uprv_sortArray(pv->v, rows, columns*4,
upvec_compareRows, pv, FALSE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
/*
* Find and set the special values.
* This has to do almost the same work as the compaction below,
* to find the indexes where the special-value rows will move.
*/
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
start=(UChar32)row[0];
/* count a new values vector if it is different from the current one */
if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) {
count+=valueColumns;
}
if(start>=UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, start, count, row+2, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
row+=columns;
}
/* count is at the beginning of the last vector, add valueColumns to include that last vector */
count+=valueColumns;
/* Call the handler once more to signal the start of delivering real values. */
handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP,
count, row-valueColumns, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
/*
* Move vector contents up to a contiguous array with only unique
* vector values, and call the handler function for each vector.
*
* This destroys the Properties Vector structure and replaces it
* with an array of just vector values.
*/
row=pv->v;
count=-valueColumns;
for(i=0; i<rows; ++i) {
/* fetch these first before memmove() may overwrite them */
start=(UChar32)row[0];
limit=(UChar32)row[1];
/* add a new values vector if it is different from the current one */
if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) {
count+=valueColumns;
uprv_memmove(pv->v+count, row+2, valueColumns*4);
}
if(start<UPVEC_FIRST_SPECIAL_CP) {
handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return;
}
}
row+=columns;
}
/* count is at the beginning of the last vector, add one to include that last vector */
pv->rows=count/valueColumns+1;
}
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) {
if(!pv->isCompacted) {
return NULL;
}
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return pv->v;
}
U_CAPI uint32_t * U_EXPORT2
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) {
uint32_t *clonedArray;
int32_t byteLength;
if(U_FAILURE(*pErrorCode)) {
return NULL;
}
if(!pv->isCompacted) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
byteLength=pv->rows*(pv->columns-2)*4;
clonedArray=(uint32_t *)uprv_malloc(byteLength);
if(clonedArray==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(clonedArray, pv->v, byteLength);
if(pRows!=NULL) {
*pRows=pv->rows;
}
if(pColumns!=NULL) {
*pColumns=pv->columns-2;
}
return clonedArray;
}
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) {
UPVecToUTrie2Context toUTrie2={ NULL };
upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode);
utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
utrie2_close(toUTrie2.trie);
toUTrie2.trie=NULL;
}
return toUTrie2.trie;
}
/*
* TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
* some 16-bit field and builds and returns a UTrie2.
*/
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode) {
UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context;
if(start<UPVEC_FIRST_SPECIAL_CP) {
utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode);
} else {
switch(start) {
case UPVEC_INITIAL_VALUE_CP:
toUTrie2->initialValue=rowIndex;
break;
case UPVEC_ERROR_VALUE_CP:
toUTrie2->errorValue=rowIndex;
break;
case UPVEC_START_REAL_VALUES_CP:
toUTrie2->maxValue=rowIndex;
if(rowIndex>0xffff) {
/* too many rows for a 16-bit trie */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
} else {
toUTrie2->trie=utrie2_open(toUTrie2->initialValue,
toUTrie2->errorValue, pErrorCode);
}
break;
default:
break;
}
}
}

Просмотреть файл

@ -0,0 +1,176 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: propsvec.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002feb22
* created by: Markus W. Scherer
*
* Store bits (Unicode character properties) in bit set vectors.
*/
#ifndef __UPROPSVEC_H__
#define __UPROPSVEC_H__
#include "unicode/utypes.h"
#include "utrie.h"
#include "utrie2.h"
U_CDECL_BEGIN
/**
* Unicode Properties Vectors associated with code point ranges.
*
* Rows of uint32_t integers in a contiguous array store
* the range limits and the properties vectors.
*
* Logically, each row has a certain number of uint32_t values,
* which is set via the upvec_open() "columns" parameter.
*
* Internally, two additional columns are stored.
* In each internal row,
* row[0] contains the start code point and
* row[1] contains the limit code point,
* which is the start of the next range.
*
* Initially, there is only one "normal" row for
* range [0..0x110000[ with values 0.
* There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
*
* It would be possible to store only one range boundary per row,
* but self-contained rows allow to later sort them by contents.
*/
struct UPropsVectors;
typedef struct UPropsVectors UPropsVectors;
/*
* Special pseudo code points for storing the initialValue and the errorValue,
* which are used to initialize a UTrie2 or similar.
*/
#define UPVEC_FIRST_SPECIAL_CP 0x110000
#define UPVEC_INITIAL_VALUE_CP 0x110000
#define UPVEC_ERROR_VALUE_CP 0x110001
#define UPVEC_MAX_CP 0x110001
/*
* Special pseudo code point used in upvec_compact() signalling the end of
* delivering special values and the beginning of delivering real ones.
* Stable value, unlike UPVEC_MAX_CP which might grow over time.
*/
#define UPVEC_START_REAL_VALUES_CP 0x200000
/*
* Open a UPropsVectors object.
* @param columns Number of value integers (uint32_t) per row.
*/
U_CAPI UPropsVectors * U_EXPORT2
upvec_open(int32_t columns, UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
upvec_close(UPropsVectors *pv);
/*
* In rows for code points [start..end], select the column,
* reset the mask bits and set the value bits (ANDed with the mask).
*
* Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
*/
U_CAPI void U_EXPORT2
upvec_setValue(UPropsVectors *pv,
UChar32 start, UChar32 end,
int32_t column,
uint32_t value, uint32_t mask,
UErrorCode *pErrorCode);
/*
* Logically const but must not be used on the same pv concurrently!
* Always returns 0 if called after upvec_compact().
*/
U_CAPI uint32_t U_EXPORT2
upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column);
/*
* pRangeStart and pRangeEnd can be NULL.
* @return NULL if rowIndex out of range and for illegal arguments,
* or if called after upvec_compact()
*/
U_CAPI uint32_t * U_EXPORT2
upvec_getRow(const UPropsVectors *pv, int32_t rowIndex,
UChar32 *pRangeStart, UChar32 *pRangeEnd);
/*
* Compact the vectors:
* - modify the memory
* - keep only unique vectors
* - store them contiguously from the beginning of the memory
* - for each (non-unique) row, call the handler function
*
* The handler's rowIndex is the index of the row in the compacted
* memory block.
* (Therefore, it starts at 0 increases in increments of the columns value.)
*
* In a first phase, only special values are delivered (each exactly once),
* with start==end both equalling a special pseudo code point.
* Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
* where rowIndex is the length of the compacted array,
* and the row is arbitrary (but not NULL).
* Then, in the second phase, the handler is called for each row of real values.
*/
typedef void U_CALLCONV
UPVecCompactHandler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
U_CAPI void U_EXPORT2
upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode);
/*
* Get the vectors array after calling upvec_compact().
* The caller must not modify nor release the returned array.
* Returns NULL if called before upvec_compact().
*/
U_CAPI const uint32_t * U_EXPORT2
upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns);
/*
* Get a clone of the vectors array after calling upvec_compact().
* The caller owns the returned array and must uprv_free() it.
* Returns NULL if called before upvec_compact().
*/
U_CAPI uint32_t * U_EXPORT2
upvec_cloneArray(const UPropsVectors *pv,
int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode);
/*
* Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
* vectors array, and freeze the trie.
*/
U_CAPI UTrie2 * U_EXPORT2
upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode);
struct UPVecToUTrie2Context {
UTrie2 *trie;
int32_t initialValue;
int32_t errorValue;
int32_t maxValue;
};
typedef struct UPVecToUTrie2Context UPVecToUTrie2Context;
/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
U_CAPI void U_CALLCONV
upvec_compactToUTrie2Handler(void *context,
UChar32 start, UChar32 end,
int32_t rowIndex, uint32_t *row, int32_t columns,
UErrorCode *pErrorCode);
U_CDECL_END
#endif

Просмотреть файл

@ -0,0 +1,587 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: punycode.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan31
* created by: Markus W. Scherer
*/
/* This ICU code derived from: */
/*
punycode.c 0.4.0 (2001-Nov-17-Sat)
http://www.cs.berkeley.edu/~amc/idn/
Adam M. Costello
http://www.nicemice.net/amc/
Disclaimer and license
Regarding this entire document or any portion of it (including
the pseudocode and C code), the author makes no guarantees and
is not responsible for any damage resulting from its use. The
author grants irrevocable permission to anyone to use, modify,
and distribute it in any way that does not diminish the rights
of anyone else to use, modify, and distribute it, provided that
redistributed derivative works do not contain misleading author or
version information. Derivative works need not be licensed under
similar terms.
*/
/*
* ICU modifications:
* - ICU data types and coding conventions
* - ICU string buffer handling with implicit source lengths
* and destination preflighting
* - UTF-16 handling
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
#include "unicode/ustring.h"
#include "unicode/utf.h"
#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "cstring.h"
#include "cmemory.h"
#include "punycode.h"
#include "uassert.h"
/* Punycode ----------------------------------------------------------------- */
/* Punycode parameters for Bootstring */
#define BASE 36
#define TMIN 1
#define TMAX 26
#define SKEW 38
#define DAMP 700
#define INITIAL_BIAS 72
#define INITIAL_N 0x80
/* "Basic" Unicode/ASCII code points */
#define _HYPHEN 0X2d
#define DELIMITER _HYPHEN
#define _ZERO_ 0X30
#define _NINE 0x39
#define _SMALL_A 0X61
#define _SMALL_Z 0X7a
#define _CAPITAL_A 0X41
#define _CAPITAL_Z 0X5a
#define IS_BASIC(c) ((c)<0x80)
#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z)
/**
* digitToBasic() returns the basic code point whose value
* (when used for representing integers) is d, which must be in the
* range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
* nonzero, in which case the uppercase form is used.
*/
static inline char
digitToBasic(int32_t digit, UBool uppercase) {
/* 0..25 map to ASCII a..z or A..Z */
/* 26..35 map to ASCII 0..9 */
if(digit<26) {
if(uppercase) {
return (char)(_CAPITAL_A+digit);
} else {
return (char)(_SMALL_A+digit);
}
} else {
return (char)((_ZERO_-26)+digit);
}
}
/**
* basicToDigit[] contains the numeric value of a basic code
* point (for use in representing integers) in the range 0 to
* BASE-1, or -1 if b is does not represent a value.
*/
static const int8_t
basicToDigit[256]={
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static inline char
asciiCaseMap(char b, UBool uppercase) {
if(uppercase) {
if(_SMALL_A<=b && b<=_SMALL_Z) {
b-=(_SMALL_A-_CAPITAL_A);
}
} else {
if(_CAPITAL_A<=b && b<=_CAPITAL_Z) {
b+=(_SMALL_A-_CAPITAL_A);
}
}
return b;
}
/* Punycode-specific Bootstring code ---------------------------------------- */
/*
* The following code omits the {parts} of the pseudo-algorithm in the spec
* that are not used with the Punycode parameter set.
*/
/* Bias adaptation function. */
static int32_t
adaptBias(int32_t delta, int32_t length, UBool firstTime) {
int32_t count;
if(firstTime) {
delta/=DAMP;
} else {
delta/=2;
}
delta+=delta/length;
for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
delta/=(BASE-TMIN);
}
return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
}
#define MAX_CP_COUNT 200
U_CFUNC int32_t
u_strToPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
const UBool *caseFlags,
UErrorCode *pErrorCode) {
int32_t cpBuffer[MAX_CP_COUNT];
int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
UChar c, c2;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/*
* Handle the basic code points and
* convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
*/
srcCPCount=destLength=0;
if(srcLength==-1) {
/* NUL-terminated input */
for(j=0; /* no condition */; ++j) {
if((c=src[j])==0) {
break;
}
if(srcCPCount==MAX_CP_COUNT) {
/* too many input code points */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(IS_BASIC(c)) {
cpBuffer[srcCPCount++]=0;
if(destLength<destCapacity) {
dest[destLength]=
caseFlags!=NULL ?
asciiCaseMap((char)c, caseFlags[j]) :
(char)c;
}
++destLength;
} else {
n=(caseFlags!=NULL && caseFlags[j])<<31L;
if(U16_IS_SINGLE(c)) {
n|=c;
} else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) {
++j;
n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
} else {
/* error: unmatched surrogate */
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
cpBuffer[srcCPCount++]=n;
}
}
} else {
/* length-specified input */
for(j=0; j<srcLength; ++j) {
if(srcCPCount==MAX_CP_COUNT) {
/* too many input code points */
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
c=src[j];
if(IS_BASIC(c)) {
cpBuffer[srcCPCount++]=0;
if(destLength<destCapacity) {
dest[destLength]=
caseFlags!=NULL ?
asciiCaseMap((char)c, caseFlags[j]) :
(char)c;
}
++destLength;
} else {
n=(caseFlags!=NULL && caseFlags[j])<<31L;
if(U16_IS_SINGLE(c)) {
n|=c;
} else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) {
++j;
n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2);
} else {
/* error: unmatched surrogate */
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
cpBuffer[srcCPCount++]=n;
}
}
}
/* Finish the basic string - if it is not empty - with a delimiter. */
basicLength=destLength;
if(basicLength>0) {
if(destLength<destCapacity) {
dest[destLength]=DELIMITER;
}
++destLength;
}
/*
* handledCPCount is the number of code points that have been handled
* basicLength is the number of basic code points
* destLength is the number of chars that have been output
*/
/* Initialize the state: */
n=INITIAL_N;
delta=0;
bias=INITIAL_BIAS;
/* Main encoding loop: */
for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) {
/*
* All non-basic code points < n have been handled already.
* Find the next larger one:
*/
for(m=0x7fffffff, j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(n<=q && q<m) {
m=q;
}
}
/*
* Increase delta enough to advance the decoder's
* <n,i> state to <m,0>, but guard against overflow:
*/
if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return 0;
}
delta+=(m-n)*(handledCPCount+1);
n=m;
/* Encode a sequence of same code points n */
for(j=0; j<srcCPCount; ++j) {
q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */
if(q<n) {
++delta;
} else if(q==n) {
/* Represent delta as a generalized variable-length integer: */
for(q=delta, k=BASE; /* no condition */; k+=BASE) {
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(t>TMAX) {
t=TMAX;
}
*/
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(q<t) {
break;
}
if(destLength<destCapacity) {
dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0);
}
++destLength;
q=(q-t)/(BASE-t);
}
if(destLength<destCapacity) {
dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0));
}
++destLength;
bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength));
delta=0;
++handledCPCount;
}
}
++delta;
++n;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
U_CFUNC int32_t
u_strFromPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
UBool *caseFlags,
UErrorCode *pErrorCode) {
int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
destCPCount, firstSupplementaryIndex, cpLength;
UChar b;
/* argument checking */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(srcLength==-1) {
srcLength=u_strlen(src);
}
/*
* Handle the basic code points:
* Let basicLength be the number of input code points
* before the last delimiter, or 0 if there is none,
* then copy the first basicLength code points to the output.
*
* The two following loops iterate backward.
*/
for(j=srcLength; j>0;) {
if(src[--j]==DELIMITER) {
break;
}
}
destLength=basicLength=destCPCount=j;
U_ASSERT(destLength>=0);
while(j>0) {
b=src[--j];
if(!IS_BASIC(b)) {
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
if(j<destCapacity) {
dest[j]=(UChar)b;
if(caseFlags!=NULL) {
caseFlags[j]=IS_BASIC_UPPERCASE(b);
}
}
}
/* Initialize the state: */
n=INITIAL_N;
i=0;
bias=INITIAL_BIAS;
firstSupplementaryIndex=1000000000;
/*
* Main decoding loop:
* Start just after the last delimiter if any
* basic code points were copied; start at the beginning otherwise.
*/
for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
/*
* in is the index of the next character to be consumed, and
* destCPCount is the number of code points in the output array.
*
* Decode a generalized variable-length integer into delta,
* which gets added to i. The overflow checking is easier
* if we increase i as we go, then subtract off its starting
* value at the end to obtain delta.
*/
for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
if(in>=srcLength) {
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
digit=basicToDigit[(uint8_t)src[in++]];
if(digit<0) {
*pErrorCode=U_INVALID_CHAR_FOUND;
return 0;
}
if(digit>(0x7fffffff-i)/w) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
i+=digit*w;
/** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(t>TMAX) {
t=TMAX;
}
*/
t=k-bias;
if(t<TMIN) {
t=TMIN;
} else if(k>=(bias+TMAX)) {
t=TMAX;
}
if(digit<t) {
break;
}
if(w>0x7fffffff/(BASE-t)) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
w*=BASE-t;
}
/*
* Modification from sample code:
* Increments destCPCount here,
* where needed instead of in for() loop tail.
*/
++destCPCount;
bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));
/*
* i was supposed to wrap around from (incremented) destCPCount to 0,
* incrementing n each time, so we'll fix that now:
*/
if(i/destCPCount>(0x7fffffff-n)) {
/* integer overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
n+=i/destCPCount;
i%=destCPCount;
/* not needed for Punycode: */
/* if (decode_digit(n) <= BASE) return punycode_invalid_input; */
if(n>0x10ffff || U_IS_SURROGATE(n)) {
/* Unicode code point overflow */
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
return 0;
}
/* Insert n at position i of the output: */
cpLength=U16_LENGTH(n);
if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) {
int32_t codeUnitIndex;
/*
* Handle indexes when supplementary code points are present.
*
* In almost all cases, there will be only BMP code points before i
* and even in the entire string.
* This is handled with the same efficiency as with UTF-32.
*
* Only the rare cases with supplementary code points are handled
* more slowly - but not too bad since this is an insertion anyway.
*/
if(i<=firstSupplementaryIndex) {
codeUnitIndex=i;
if(cpLength>1) {
firstSupplementaryIndex=codeUnitIndex;
} else {
++firstSupplementaryIndex;
}
} else {
codeUnitIndex=firstSupplementaryIndex;
U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
}
/* use the UChar index codeUnitIndex instead of the code point index i */
if(codeUnitIndex<destLength) {
uprv_memmove(dest+codeUnitIndex+cpLength,
dest+codeUnitIndex,
(destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
if(caseFlags!=NULL) {
uprv_memmove(caseFlags+codeUnitIndex+cpLength,
caseFlags+codeUnitIndex,
destLength-codeUnitIndex);
}
}
if(cpLength==1) {
/* BMP, insert one code unit */
dest[codeUnitIndex]=(UChar)n;
} else {
/* supplementary character, insert two code units */
dest[codeUnitIndex]=U16_LEAD(n);
dest[codeUnitIndex+1]=U16_TRAIL(n);
}
if(caseFlags!=NULL) {
/* Case of last character determines uppercase flag: */
caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
if(cpLength==2) {
caseFlags[codeUnitIndex+1]=FALSE;
}
}
}
destLength+=cpLength;
U_ASSERT(destLength>=0);
++i;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */
#endif /* #if !UCONFIG_NO_IDNA */

Просмотреть файл

@ -0,0 +1,118 @@
/*
*******************************************************************************
*
* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: punycode.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2002jan31
* created by: Markus W. Scherer
*/
/* This ICU code derived from: */
/*
punycode.c 0.4.0 (2001-Nov-17-Sat)
http://www.cs.berkeley.edu/~amc/idn/
Adam M. Costello
http://www.nicemice.net/amc/
*/
#ifndef __PUNYCODE_H__
#define __PUNYCODE_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_IDNA
/**
* u_strToPunycode() converts Unicode to Punycode.
*
* The input string must not contain single, unpaired surrogates.
* The output will be represented as an array of ASCII code points.
*
* The output string is NUL-terminated according to normal ICU
* string output rules.
*
* @param src Input Unicode string.
* This function handles a limited amount of code points
* (the limit is >=64).
* U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
* @param srcLength Number of UChars in src, or -1 if NUL-terminated.
* @param dest Output Punycode array.
* @param destCapacity Size of dest.
* @param caseFlags Vector of boolean values, one per input UChar,
* indicating that the corresponding character is to be
* marked for the decoder optionally
* uppercasing (TRUE) or lowercasing (FALSE)
* the character.
* ASCII characters are output directly in the case as marked.
* Flags corresponding to trail surrogates are ignored.
* If caseFlags==NULL then input characters are not
* case-mapped.
* @param pErrorCode ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if src contains
* unmatched single surrogates.
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
* too many code points.
* @return Number of ASCII characters in puny.
*
* @see u_strFromPunycode
*/
U_CFUNC int32_t
u_strToPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
const UBool *caseFlags,
UErrorCode *pErrorCode);
/**
* u_strFromPunycode() converts Punycode to Unicode.
* The Unicode string will be at most as long (in UChars)
* than the Punycode string (in chars).
*
* @param src Input Punycode string.
* @param srcLength Length of puny, or -1 if NUL-terminated
* @param dest Output Unicode string buffer.
* @param destCapacity Size of dest in number of UChars,
* and of caseFlags in numbers of UBools.
* @param caseFlags Output array for case flags as
* defined by the Punycode string.
* The caller should uppercase (TRUE) or lowercase (FASLE)
* the corresponding character in dest.
* For supplementary characters, only the lead surrogate
* is marked, and FALSE is stored for the trail surrogate.
* This is redundant and not necessary for ASCII characters
* because they are already in the case indicated.
* Can be NULL if the case flags are not needed.
* @param pErrorCode ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if a non-ASCII character
* precedes the last delimiter ('-'),
* or if an invalid character (not a-zA-Z0-9) is found
* after the last delimiter.
* U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
* @return Number of UChars written to dest.
*
* @see u_strToPunycode
*/
U_CFUNC int32_t
u_strFromPunycode(const UChar *src, int32_t srcLength,
UChar *dest, int32_t destCapacity,
UBool *caseFlags,
UErrorCode *pErrorCode);
#endif /* #if !UCONFIG_NO_IDNA */
#endif
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,570 @@
/*
******************************************************************************
*
* Copyright (C) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
* FILE NAME : putilimp.h
*
* Date Name Description
* 10/17/04 grhoten Move internal functions from putil.h to this file.
******************************************************************************
*/
#ifndef PUTILIMP_H
#define PUTILIMP_H
#include "unicode/utypes.h"
#include "unicode/putil.h"
/**
* \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
* Nearly all CPUs and compilers implement a right-shift of a signed integer
* as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB))
* into the vacated bits (sign extension).
* For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1.
*
* This can be useful for storing a signed value in the upper bits
* and another bit field in the lower bits.
* The signed value can be retrieved by simple right-shifting.
*
* This is consistent with the Java language.
*
* However, the C standard allows compilers to implement a right-shift of a signed integer
* as a Logical Shift Right which copies a 0 into the vacated bits.
* For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff.
*
* Code that depends on the natural behavior should be guarded with this macro,
* with an alternate path for unusual platforms.
* @internal
*/
#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC
/* Use the predefined value. */
#else
/*
* Nearly all CPUs & compilers implement a right-shift of a signed integer
* as an Arithmetic Shift Right (with sign extension).
*/
# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1
#endif
/** Define this to 1 if your platform supports IEEE 754 floating point,
to 0 if it does not. */
#ifndef IEEE_754
# define IEEE_754 1
#endif
/**
* uintptr_t is an optional part of the standard definitions in stdint.h.
* The opengroup.org documentation for stdint.h says
* "On XSI-conformant systems, the intptr_t and uintptr_t types are required;
* otherwise, they are optional."
* We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well.
*
* Do not use ptrdiff_t since it is signed. size_t is unsigned.
*/
/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */
#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390)
typedef size_t uintptr_t;
#endif
/**
* \def U_HAVE_MSVC_2003_OR_EARLIER
* Flag for workaround of MSVC 2003 optimization bugs
* @internal
*/
#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
#define U_HAVE_MSVC_2003_OR_EARLIER
#endif
/*===========================================================================*/
/** @{ Information about POSIX support */
/*===========================================================================*/
#ifdef U_HAVE_NL_LANGINFO_CODESET
/* Use the predefined value. */
#elif U_PLATFORM_HAS_WIN32_API
# define U_HAVE_NL_LANGINFO_CODESET 0
#else
# define U_HAVE_NL_LANGINFO_CODESET 1
#endif
#ifdef U_NL_LANGINFO_CODESET
/* Use the predefined value. */
#elif !U_HAVE_NL_LANGINFO_CODESET
# define U_NL_LANGINFO_CODESET -1
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#else
# define U_NL_LANGINFO_CODESET CODESET
#endif
#ifdef U_TZSET
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TZSET _tzset
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#else
# define U_TZSET tzset
#endif
#ifdef U_TIMEZONE
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID
# define U_TIMEZONE timezone
#elif U_PLATFORM_IS_LINUX_BASED
# define U_TIMEZONE __timezone
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TIMEZONE _timezone
#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
/* not defined */
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#else
# define U_TIMEZONE timezone
#endif
#ifdef U_TZNAME
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TZNAME _tzname
#elif U_PLATFORM == U_PF_OS400
/* not defined */
#else
# define U_TZNAME tzname
#endif
#ifdef U_HAVE_MMAP
/* Use the predefined value. */
#elif U_PLATFORM_HAS_WIN32_API
# define U_HAVE_MMAP 0
#else
# define U_HAVE_MMAP 1
#endif
#ifdef U_HAVE_POPEN
/* Use the predefined value. */
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_HAVE_POPEN 0
#elif U_PLATFORM == U_PF_OS400
# define U_HAVE_POPEN 0
#else
# define U_HAVE_POPEN 1
#endif
/**
* \def U_HAVE_DIRENT_H
* Defines whether dirent.h is available.
* @internal
*/
#ifdef U_HAVE_DIRENT_H
/* Use the predefined value. */
#elif U_PLATFORM_HAS_WIN32_API
# define U_HAVE_DIRENT_H 0
#else
# define U_HAVE_DIRENT_H 1
#endif
/** @} */
/*===========================================================================*/
/** @{ GCC built in functions for atomic memory operations */
/*===========================================================================*/
/**
* \def U_HAVE_GCC_ATOMICS
* @internal
*/
#ifdef U_HAVE_GCC_ATOMICS
/* Use the predefined value. */
#elif U_GCC_MAJOR_MINOR >= 404
# define U_HAVE_GCC_ATOMICS 1
#else
# define U_HAVE_GCC_ATOMICS 0
#endif
/** @} */
/*===========================================================================*/
/** @{ Code alignment */
/*===========================================================================*/
/**
* \def U_ALIGN_CODE
* This is used to align code fragments to a specific byte boundary.
* This is useful for getting consistent performance test results.
* @internal
*/
#ifdef U_ALIGN_CODE
/* Use the predefined value. */
#elif defined(_MSC_VER) && defined(_M_IX86) && !defined(_MANAGED)
# define U_ALIGN_CODE(boundarySize) __asm align boundarySize
#else
# define U_ALIGN_CODE(boundarySize)
#endif
/** @} */
/*===========================================================================*/
/** @{ Programs used by ICU code */
/*===========================================================================*/
/**
* \def U_MAKE_IS_NMAKE
* Defines whether the "make" program is Windows nmake.
*/
#ifdef U_MAKE_IS_NMAKE
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_WINDOWS
# define U_MAKE_IS_NMAKE 1
#else
# define U_MAKE_IS_NMAKE 0
#endif
/** @} */
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
/**
* Floating point utility to determine if a double is Not a Number (NaN).
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d);
/**
* Floating point utility to determine if a double has an infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d);
/**
* Floating point utility to determine if a double has a positive infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d);
/**
* Floating point utility to determine if a double has a negative infinite value.
* @internal
*/
U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d);
/**
* Floating point utility that returns a Not a Number (NaN) value.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_getNaN(void);
/**
* Floating point utility that returns an infinite value.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_getInfinity(void);
/**
* Floating point utility to truncate a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_trunc(double d);
/**
* Floating point utility to calculate the floor of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_floor(double d);
/**
* Floating point utility to calculate the ceiling of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_ceil(double d);
/**
* Floating point utility to calculate the absolute value of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fabs(double d);
/**
* Floating point utility to calculate the fractional and integer parts of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger);
/**
* Floating point utility to calculate the remainder of a double divided by another double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y);
/**
* Floating point utility to calculate d to the power of exponent (d^exponent).
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent);
/**
* Floating point utility to calculate 10 to the power of exponent (10^exponent).
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent);
/**
* Floating point utility to calculate the maximum value of two doubles.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y);
/**
* Floating point utility to calculate the minimum value of two doubles.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y);
/**
* Private utility to calculate the maximum value of two integers.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y);
/**
* Private utility to calculate the minimum value of two integers.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y);
#if U_IS_BIG_ENDIAN
# define uprv_isNegative(number) (*((signed char *)&(number))<0)
#else
# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
#endif
/**
* Return the largest positive number that can be represented by an integer
* type of arbitrary bit length.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void);
/**
* Floating point utility to calculate the logarithm of a double.
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_log(double d);
/**
* Does common notion of rounding e.g. uprv_floor(x + 0.5);
* @param x the double number
* @return the rounded double
* @internal
*/
U_INTERNAL double U_EXPORT2 uprv_round(double x);
#if 0
/**
* Returns the number of digits after the decimal point in a double number x.
*
* @param x the double number
* @return the number of digits after the decimal point in a double number x.
* @internal
*/
/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/
#endif
#if !U_CHARSET_IS_UTF8
/**
* Please use ucnv_getDefaultName() instead.
* Return the default codepage for this platform and locale.
* This function can call setlocale() on Unix platforms. Please read the
* platform documentation on setlocale() before calling this function.
* @return the default codepage for this platform
* @internal
*/
U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
#endif
/**
* Please use uloc_getDefault() instead.
* Return the default locale ID string by querying ths system, or
* zero if one cannot be found.
* This function can call setlocale() on Unix platforms. Please read the
* platform documentation on setlocale() before calling this function.
* @return the default locale ID string
* @internal
*/
U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
/**
* Time zone utilities
*
* Wrappers for C runtime library functions relating to timezones.
* The t_tzset() function (similar to tzset) uses the current setting
* of the environment variable TZ to assign values to three global
* variables: daylight, timezone, and tzname. These variables have the
* following meanings, and are declared in &lt;time.h&gt;.
*
* daylight Nonzero if daylight-saving-time zone (DST) is specified
* in TZ; otherwise, 0. Default value is 1.
* timezone Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
*
* Notes: For example, to set the TZ environment variable to correspond
* to the current time zone in Germany, you can use one of the
* following statements:
*
* set TZ=GST1GDT
* set TZ=GST+1GDT
*
* If the TZ value is not set, t_tzset() attempts to use the time zone
* information specified by the operating system. Under Windows NT
* and Windows 95, this information is specified in the Control Panel's
* Date/Time application.
* @internal
*/
U_INTERNAL void U_EXPORT2 uprv_tzset(void);
/**
* Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* @return the difference in seconds between coordinated universal time and local time.
* @internal
*/
U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void);
/**
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
* @internal
*/
U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n);
/**
* Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
* This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions.
* @return the UTC time measured in milliseconds
* @internal
*/
U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void);
/**
* Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970.
* This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that
* exposes time to the end user.
* @return the UTC time measured in milliseconds
* @internal
*/
U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void);
/**
* Determine whether a pathname is absolute or not, as defined by the platform.
* @param path Pathname to test
* @return TRUE if the path is absolute
* @internal (ICU 3.0)
*/
U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path);
/**
* Use U_MAX_PTR instead of this function.
* @param void pointer to test
* @return the largest possible pointer greater than the base
* @internal (ICU 3.8)
*/
U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
/**
* Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer.
* In fact, buffer sizes must not exceed 2GB so that the difference between
* the buffer limit and the buffer start can be expressed in an int32_t.
*
* The definition of U_MAX_PTR must fulfill the following conditions:
* - return the largest possible pointer greater than base
* - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
* - avoid wrapping around at high addresses
* - make sure that the returned pointer is not farther from base than 0x7fffffff bytes
*
* @param base The beginning of a buffer to find the maximum offset from
* @internal
*/
#ifndef U_MAX_PTR
# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
/* We have 31-bit pointers. */
# define U_MAX_PTR(base) ((void *)0x7fffffff)
# elif U_PLATFORM == U_PF_OS400
# define U_MAX_PTR(base) uprv_maximumPtr((void *)base)
# elif 0
/*
* For platforms where pointers are scalar values (which is normal, but unlike i5/OS)
* but that do not define uintptr_t.
*
* However, this does not work on modern compilers:
* The C++ standard does not define pointer overflow, and allows compilers to
* assume that p+u>p for any pointer p and any integer u>0.
* Thus, modern compilers optimize away the ">" comparison.
* (See ICU tickets #7187 and #8096.)
*/
# define U_MAX_PTR(base) \
((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \
? ((char *)(base)+0x7fffffffu) \
: (char *)-1))
# else
/* Default version. C++ standard compliant for scalar pointers. */
# define U_MAX_PTR(base) \
((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \
? ((uintptr_t)(base)+0x7fffffffu) \
: (uintptr_t)-1))
# endif
#endif
/* Dynamic Library Functions */
typedef void (UVoidFunction)(void);
#if U_ENABLE_DYLOAD
/**
* Load a library
* @internal (ICU 4.4)
*/
U_INTERNAL void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status);
/**
* Close a library
* @internal (ICU 4.4)
*/
U_INTERNAL void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status);
/**
* Extract a symbol from a library (function)
* @internal (ICU 4.8)
*/
U_INTERNAL UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status);
/**
* Extract a symbol from a library (function)
* Not implemented, no clients.
* @internal
*/
/* U_INTERNAL void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */
#endif
/**
* Define malloc and related functions
* @internal
*/
#if U_PLATFORM == U_PF_OS400
# define uprv_default_malloc(x) _C_TS_malloc(x)
# define uprv_default_realloc(x,y) _C_TS_realloc(x,y)
# define uprv_default_free(x) _C_TS_free(x)
/* also _C_TS_calloc(x) */
#else
/* C defaults */
# define uprv_default_malloc(x) malloc(x)
# define uprv_default_realloc(x,y) realloc(x,y)
# define uprv_default_free(x) free(x)
#endif
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

453
intl/icu/source/common/rbbicst.pl Executable file
Просмотреть файл

@ -0,0 +1,453 @@
#**************************************************************************
# Copyright (C) 2002-2005 International Business Machines Corporation *
# and others. All rights reserved. *
#**************************************************************************
#
# rbbicst Compile the RBBI rule paser state table data into initialized C data.
# Usage:
# cd icu/source/common
# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h
# perl rbbicst.pl -j < rbbirpt.txt > RBBIRuleParseTable.java
#
# The output file, rbbrpt.h, is included by some of the .cpp rbbi
# implementation files. This perl script is NOT run as part
# of a normal ICU build. It is run by hand when needed, and the
# rbbirpt.h generated file is put back into cvs.
#
# See rbbirpt.txt for a description of the input format for this script.
#
if ($ARGV[0] eq "-j") {
$javaOutput = 1;
shift @ARGV;
}
$num_states = 1; # Always the state number for the line being compiled.
$line_num = 0; # The line number in the input file.
$states{"pop"} = 255; # Add the "pop" to the list of defined state names.
# This prevents any state from being labelled with "pop",
# and resolves references to "pop" in the next state field.
line_loop: while (<>) {
chomp();
$line = $_;
@fields = split();
$line_num++;
# Remove # comments, which are any fields beginning with a #, plus all
# that follow on the line.
for ($i=0; $i<@fields; $i++) {
if ($fields[$i] =~ /^#/) {
@fields = @fields[0 .. $i-1];
last;
}
}
# ignore blank lines, and those with no fields left after stripping comments..
if (@fields == 0) {
next;
}
#
# State Label: handling.
# Does the first token end with a ":"? If so, it's the name of a state.
# Put in a hash, together with the current state number,
# so that we can later look up the number from the name.
#
if (@fields[0] =~ /.*:$/) {
$state_name = @fields[0];
$state_name =~ s/://; # strip off the colon from the state name.
if ($states{$state_name} != 0) {
print " rbbicst: at line $line-num duplicate definition of state $state_name\n";
}
$states{$state_name} = $num_states;
$stateNames[$num_states] = $state_name;
# if the label was the only thing on this line, go on to the next line,
# otherwise assume that a state definition is on the same line and fall through.
if (@fields == 1) {
next line_loop;
}
shift @fields; # shift off label field in preparation
# for handling the rest of the line.
}
#
# State Transition line.
# syntax is this,
# character [n] target-state [^push-state] [function-name]
# where
# [something] is an optional something
# character is either a single quoted character e.g. '['
# or a name of a character class, e.g. white_space
#
$state_line_num[$num_states] = $line_num; # remember line number with each state
# so we can make better error messages later.
#
# First field, character class or literal character for this transition.
#
if ($fields[0] =~ /^'.'$/) {
# We've got a quoted literal character.
$state_literal_chars[$num_states] = $fields[0];
$state_literal_chars[$num_states] =~ s/'//g;
} else {
# We've got the name of a character class.
$state_char_class[$num_states] = $fields[0];
if ($fields[0] =~ /[\W]/) {
print " rbbicsts: at line $line_num, bad character literal or character class name.\n";
print " scanning $fields[0]\n";
exit(-1);
}
}
shift @fields;
#
# do the 'n' flag
#
$state_flag[$num_states] = $javaOutput? "false" : "FALSE";
if ($fields[0] eq "n") {
$state_flag[$num_states] = $javaOutput? "true": "TRUE";
shift @fields;
}
#
# do the destination state.
#
$state_dest_state[$num_states] = $fields[0];
if ($fields[0] eq "") {
print " rbbicsts: at line $line_num, destination state missing.\n";
exit(-1);
}
shift @fields;
#
# do the push state, if present.
#
if ($fields[0] =~ /^\^/) {
$fields[0] =~ s/^\^//;
$state_push_state[$num_states] = $fields[0];
if ($fields[0] eq "" ) {
print " rbbicsts: at line $line_num, expected state after ^ (no spaces).\n";
exit(-1);
}
shift @fields;
}
#
# Lastly, do the optional action name.
#
if ($fields[0] ne "") {
$state_func_name[$num_states] = $fields[0];
shift @fields;
}
#
# There should be no fields left on the line at this point.
#
if (@fields > 0) {
print " rbbicsts: at line $line_num, unexpected extra stuff on input line.\n";
print " scanning $fields[0]\n";
}
$num_states++;
}
#
# We've read in the whole file, now go back and output the
# C source code for the state transition table.
#
# We read all states first, before writing anything, so that the state numbers
# for the destination states are all available to be written.
#
#
# Make hashes for the names of the character classes and
# for the names of the actions that appeared.
#
for ($state=1; $state < $num_states; $state++) {
if ($state_char_class[$state] ne "") {
if ($charClasses{$state_char_class[$state]} == 0) {
$charClasses{$state_char_class[$state]} = 1;
}
}
if ($state_func_name[$state] eq "") {
$state_func_name[$state] = "doNOP";
}
if ($actions{$state_action_name[$state]} == 0) {
$actions{$state_func_name[$state]} = 1;
}
}
#
# Check that all of the destination states have been defined
#
#
$states{"exit"} = 0; # Predefined state name, terminates state machine.
for ($state=1; $state<$num_states; $state++) {
if ($states{$state_dest_state[$state]} == 0 && $state_dest_state[$state] ne "exit") {
print "Error at line $state_line_num[$state]: target state \"$state_dest_state[$state]\" is not defined.\n";
$errors++;
}
if ($state_push_state[$state] ne "" && $states{$state_push_state[$state]} == 0) {
print "Error at line $state_line_num[$state]: target state \"$state_push_state[$state]\" is not defined.\n";
$errors++;
}
}
die if ($errors>0);
#
# Assign numbers to each of the character classes classes used.
# Sets are numbered from 128 - 250
# The values 0-127 in the state table are used for matching
# individual ASCII characters (the only thing that can appear in the rules.)
# The "set" names appearing in the code below (default, etc.) need special
# handling because they do not correspond to a normal set of characters,
# but trigger special handling by code in the state machine.
#
$i = 128;
foreach $setName (sort keys %charClasses) {
if ($setName eq "default") {
$charClasses{$setName} = 255;}
elsif ($setName eq "escaped") {
$charClasses{$setName} = 254;}
elsif ($setName eq "escapedP") {
$charClasses{$setName} = 253;}
elsif ($setName eq "eof") {
$charClasses{$setName} = 252;}
else {
# Normal (single) character class. Number them.
$charClasses{$setName} = $i;
$i++;
}
}
my ($sec, $min, $hour, , $day, $mon, $year, $wday, $yday, $isdst) = localtime;
$year += 1900;
if ($javaOutput) {
print "/*\n";
print " *******************************************************************************\n";
print " * Copyright (C) 2003-$year,\n";
print " * International Business Machines Corporation and others. All Rights Reserved.\n";
print " *******************************************************************************\n";
print " */\n";
print " \n";
print "package com.ibm.icu.text;\n";
print " \n";
print "/**\n";
print " * Generated Java File. Do not edit by hand.\n";
print " * This file contains the state table for the ICU Rule Based Break Iterator\n";
print " * rule parser.\n";
print " * It is generated by the Perl script \"rbbicst.pl\" from\n";
print " * the rule parser state definitions file \"rbbirpt.txt\".\n";
print " * \@internal \n";
print " *\n";
print " */\n";
print "class RBBIRuleParseTable\n";
print "{\n";
#
# Emit the constants for the actions to be performed.
#
$n = 1;
foreach $act (sort keys %actions) {
print " static final short $act = $n;\n";
$n++;
}
print " \n";
#
# Emit constants for char class names
#
foreach $setName (sort keys %charClasses) {
print " static final short kRuleSet_$setName = $charClasses{$setName};\n";
}
print "\n\n";
print " static class RBBIRuleTableElement { \n";
print " short fAction; \n";
print " short fCharClass; \n";
print " short fNextState; \n";
print " short fPushState; \n";
print " boolean fNextChar; \n";
print " String fStateName; \n";
print " RBBIRuleTableElement(short a, int cc, int ns, int ps, boolean nc, String sn) { \n";
print " fAction = a; \n";
print " fCharClass = (short)cc; \n";
print " fNextState = (short)ns; \n";
print " fPushState = (short)ps; \n";
print " fNextChar = nc; \n";
print " fStateName = sn; \n";
print " } \n";
print " }; \n";
print " \n";
print " static RBBIRuleTableElement[] gRuleParseStateTable = { \n ";
print " new RBBIRuleTableElement(doNOP, 0, 0,0, true, null ) // 0 \n"; #output the unused state 0.
for ($state=1; $state < $num_states; $state++) {
print " , new RBBIRuleTableElement($state_func_name[$state],";
if ($state_literal_chars[$state] ne "") {
$c = $state_literal_chars[$state];
print("'$c', ");
}else {
print " $charClasses{$state_char_class[$state]},";
}
print " $states{$state_dest_state[$state]},";
# The push-state field is optional. If omitted, fill field with a zero, which flags
# the state machine that there is no push state.
if ($state_push_state[$state] eq "") {
print "0, ";
} else {
print " $states{$state_push_state[$state]},";
}
print " $state_flag[$state], ";
# if this is the first row of the table for this state, put out the state name.
if ($stateNames[$state] ne "") {
print " \"$stateNames[$state]\") ";
} else {
print " null ) ";
}
# Put out a comment showing the number (index) of this state row,
print " // $state ";
print "\n";
}
print " };\n";
print "}; \n";
}
else
{
#
# C++ Output ...
#
print "//---------------------------------------------------------------------------------\n";
print "//\n";
print "// Generated Header File. Do not edit by hand.\n";
print "// This file contains the state table for the ICU Rule Based Break Iterator\n";
print "// rule parser.\n";
print "// It is generated by the Perl script \"rbbicst.pl\" from\n";
print "// the rule parser state definitions file \"rbbirpt.txt\".\n";
print "//\n";
print "// Copyright (C) 2002-$year International Business Machines Corporation \n";
print "// and others. All rights reserved. \n";
print "//\n";
print "//---------------------------------------------------------------------------------\n";
print "#ifndef RBBIRPT_H\n";
print "#define RBBIRPT_H\n";
print "\n";
print "U_NAMESPACE_BEGIN\n";
#
# Emit the constants for indicies of Unicode Sets
# Define one constant for each of the character classes encountered.
# At the same time, store the index corresponding to the set name back into hash.
#
print "//\n";
print "// Character classes for RBBI rule scanning.\n";
print "//\n";
foreach $setName (sort keys %charClasses) {
if ($charClasses{$setName} < 250) {
# Normal character class.
print " static const uint8_t kRuleSet_$setName = $charClasses{$setName};\n";
}
}
print "\n\n";
#
# Emit the enum for the actions to be performed.
#
print "enum RBBI_RuleParseAction {\n";
foreach $act (sort keys %actions) {
print " $act,\n";
}
print " rbbiLastAction};\n\n";
#
# Emit the struct definition for transtion table elements.
#
print "//-------------------------------------------------------------------------------\n";
print "//\n";
print "// RBBIRuleTableEl represents the structure of a row in the transition table\n";
print "// for the rule parser state machine.\n";
print "//-------------------------------------------------------------------------------\n";
print "struct RBBIRuleTableEl {\n";
print " RBBI_RuleParseAction fAction;\n";
print " uint8_t fCharClass; // 0-127: an individual ASCII character\n";
print " // 128-255: character class index\n";
print " uint8_t fNextState; // 0-250: normal next-stat numbers\n";
print " // 255: pop next-state from stack.\n";
print " uint8_t fPushState;\n";
print " UBool fNextChar;\n";
print "};\n\n";
#
# emit the state transition table
#
print "static const struct RBBIRuleTableEl gRuleParseStateTable[] = {\n";
print " {doNOP, 0, 0, 0, TRUE}\n"; # State 0 is a dummy. Real states start with index = 1.
for ($state=1; $state < $num_states; $state++) {
print " , {$state_func_name[$state],";
if ($state_literal_chars[$state] ne "") {
$c = $state_literal_chars[$state];
printf(" %d /* $c */,", ord($c)); # use numeric value, so EBCDIC machines are ok.
}else {
print " $charClasses{$state_char_class[$state]},";
}
print " $states{$state_dest_state[$state]},";
# The push-state field is optional. If omitted, fill field with a zero, which flags
# the state machine that there is no push state.
if ($state_push_state[$state] eq "") {
print "0, ";
} else {
print " $states{$state_push_state[$state]},";
}
print " $state_flag[$state]} ";
# Put out a C++ comment showing the number (index) of this state row,
# and, if this is the first row of the table for this state, the state name.
print " // $state ";
if ($stateNames[$state] ne "") {
print " $stateNames[$state]";
}
print "\n";
};
print " };\n";
#
# emit a mapping array from state numbers to state names.
#
# This array is used for producing debugging output from the rule parser.
#
print "#ifdef RBBI_DEBUG\n";
print "static const char * const RBBIRuleStateNames[] = {";
for ($state=0; $state<$num_states; $state++) {
if ($stateNames[$state] ne "") {
print " \"$stateNames[$state]\",\n";
} else {
print " 0,\n";
}
}
print " 0};\n";
print "#endif\n\n";
print "U_NAMESPACE_END\n";
print "#endif\n";
}

Просмотреть файл

@ -0,0 +1,446 @@
/*
***************************************************************************
* Copyright (C) 1999-2010 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/utypes.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "utrie.h"
#include "udatamem.h"
#include "cmemory.h"
#include "cstring.h"
#include "umutex.h"
#include "uassert.h"
//-----------------------------------------------------------------------------------
//
// Trie access folding function. Copied as-is from properties code in uchar.c
//
//-----------------------------------------------------------------------------------
U_CDECL_BEGIN
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//-----------------------------------------------------------------------------
//
// Constructors.
//
//-----------------------------------------------------------------------------
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) {
init(data, status);
}
RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) {
init(data, status);
fDontFreeData = TRUE;
}
RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
const RBBIDataHeader *d = (const RBBIDataHeader *)
// ((char *)&(udm->pHeader->info) + udm->pHeader->info.size);
// taking into consideration the padding added in by udata_write
((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
init(d, status);
fUDataMem = udm;
}
//-----------------------------------------------------------------------------
//
// init(). Does most of the work of construction, shared between the
// constructors.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
fHeader = data;
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
{
status = U_INVALID_FORMAT_ERROR;
return;
}
// Note: in ICU version 3.2 and earlier, there was a formatVersion 1
// that is no longer supported. At that time fFormatVersion was
// an int32_t field, rather than an array of 4 bytes.
fDontFreeData = FALSE;
fUDataMem = NULL;
fReverseTable = NULL;
fSafeFwdTable = NULL;
fSafeRevTable = NULL;
if (data->fFTableLen != 0) {
fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable);
}
if (data->fRTableLen != 0) {
fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable);
}
if (data->fSFTableLen != 0) {
fSafeFwdTable = (RBBIStateTable *)((char *)data + fHeader->fSFTable);
}
if (data->fSRTableLen != 0) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
}
utrie_unserialize(&fTrie,
(uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen,
&status);
if (U_FAILURE(status)) {
return;
}
fTrie.getFoldingOffset=getFoldingOffset;
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
fRuleString.setTo(TRUE, fRuleSource, -1);
U_ASSERT(data->fRuleSourceLen > 0);
fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
fRefCount = 1;
#ifdef RBBI_DEBUG
char *debugEnv = getenv("U_RBBIDEBUG");
if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();}
#endif
}
//-----------------------------------------------------------------------------
//
// Destructor. Don't call this - use removeReference() instead.
//
//-----------------------------------------------------------------------------
RBBIDataWrapper::~RBBIDataWrapper() {
U_ASSERT(fRefCount == 0);
if (fUDataMem) {
udata_close(fUDataMem);
} else if (!fDontFreeData) {
uprv_free((void *)fHeader);
}
}
//-----------------------------------------------------------------------------
//
// Operator == Consider two RBBIDataWrappers to be equal if they
// refer to the same underlying data. Although
// the data wrappers are normally shared between
// iterator instances, it's possible to independently
// open the same data twice, and get two instances, which
// should still be ==.
//
//-----------------------------------------------------------------------------
UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
if (fHeader == other.fHeader) {
return TRUE;
}
if (fHeader->fLength != other.fHeader->fLength) {
return FALSE;
}
if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
return TRUE;
}
return FALSE;
}
int32_t RBBIDataWrapper::hashCode() {
return fHeader->fFTableLen;
}
//-----------------------------------------------------------------------------
//
// Reference Counting. A single RBBIDataWrapper object is shared among
// however many RulesBasedBreakIterator instances are
// referencing the same data.
//
//-----------------------------------------------------------------------------
void RBBIDataWrapper::removeReference() {
if (umtx_atomic_dec(&fRefCount) == 0) {
delete this;
}
}
RBBIDataWrapper *RBBIDataWrapper::addReference() {
umtx_atomic_inc(&fRefCount);
return this;
}
//-----------------------------------------------------------------------------
//
// getRuleSourceString
//
//-----------------------------------------------------------------------------
const UnicodeString &RBBIDataWrapper::getRuleSourceString() const {
return fRuleString;
}
//-----------------------------------------------------------------------------
//
// print - debugging function to dump the runtime data tables.
//
//-----------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) {
uint32_t c;
uint32_t s;
RBBIDebugPrintf(" %s\n", heading);
RBBIDebugPrintf("State | Acc LA TagIx");
for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("----");
}
RBBIDebugPrintf("\n");
if (table == NULL) {
RBBIDebugPrintf(" N U L L T A B L E\n\n");
return;
}
for (s=0; s<table->fNumStates; s++) {
RBBIStateTableRow *row = (RBBIStateTableRow *)
(table->fTableData + (table->fRowLen * s));
RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
for (c=0; c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("%3d ", row->fNextState[c]);
}
RBBIDebugPrintf("\n");
}
RBBIDebugPrintf("\n");
}
#endif
#ifdef RBBI_DEBUG
void RBBIDataWrapper::printData() {
RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader);
RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1],
fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]);
RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength);
RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount);
printTable("Forward State Transition Table", fForwardTable);
printTable("Reverse State Transition Table", fReverseTable);
printTable("Safe Forward State Transition Table", fSafeFwdTable);
printTable("Safe Reverse State Transition Table", fSafeRevTable);
RBBIDebugPrintf("\nOrignal Rules source:\n");
for (int32_t c=0; fRuleSource[c] != 0; c++) {
RBBIDebugPrintf("%c", fRuleSource[c]);
}
RBBIDebugPrintf("\n\n");
}
#endif
U_NAMESPACE_END
U_NAMESPACE_USE
//-----------------------------------------------------------------------------
//
// ubrk_swap - byte swap and char encoding swap of RBBI data
//
//-----------------------------------------------------------------------------
U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
UErrorCode *status) {
if (status == NULL || U_FAILURE(*status)) {
return 0;
}
if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
//
// Check that the data header is for for break data.
// (Header contents are defined in genbrk.cpp)
//
const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
pInfo->formatVersion[0]);
*status=U_UNSUPPORTED_ERROR;
return 0;
}
//
// Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific
// RBBIDataHeader). This swap also conveniently gets us
// the size of the ICU d.h., which lets us locate the start
// of the RBBI specific data.
//
int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
rbbiDH->fFormatVersion[0] != 3 ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
{
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR;
return 0;
}
//
// Prefight operation? Just return the size
//
int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength);
int32_t totalSize = headerSize + breakDataLength;
if (length < 0) {
return totalSize;
}
//
// Check that length passed in is consistent with length from RBBI data header.
//
if (length < totalSize) {
udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n",
breakDataLength);
*status=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
//
// Swap the Data. Do the data itself first, then the RBBI Data Header, because
// we need to reference the header to locate the data, and an
// inplace swap of the header leaves it unusable.
//
uint8_t *outBytes = (uint8_t *)outData + headerSize;
RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes;
int32_t tableStartOffset;
int32_t tableLength;
//
// If not swapping in place, zero out the output buffer before starting.
// Individual tables and other data items within are aligned to 8 byte boundaries
// when originally created. Any unused space between items needs to be zero.
//
if (inBytes != outBytes) {
uprv_memset(outBytes, 0, breakDataLength);
}
//
// Each state table begins with several 32 bit fields. Calculate the size
// in bytes of these.
//
int32_t topSize = offsetof(RBBIStateTable, fTableData);
// Forward state table.
tableStartOffset = ds->readUInt32(rbbiDH->fFTable);
tableLength = ds->readUInt32(rbbiDH->fFTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fRTable);
tableLength = ds->readUInt32(rbbiDH->fRTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Safe Forward state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSFTable);
tableLength = ds->readUInt32(rbbiDH->fSFTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Safe Reverse state table. Same layout as forward table, above.
tableStartOffset = ds->readUInt32(rbbiDH->fSRTable);
tableLength = ds->readUInt32(rbbiDH->fSRTableLen);
if (tableLength > 0) {
ds->swapArray32(ds, inBytes+tableStartOffset, topSize,
outBytes+tableStartOffset, status);
ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize,
outBytes+tableStartOffset+topSize, status);
}
// Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
// Source Rules Text. It's UChar data
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
// Table of rule status values. It's all int_32 values
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
// And, last, the header.
// It is all int32_t values except for fFormataVersion, which is an array of four bytes.
// Swap the whole thing as int32_t, then re-swap the one field.
//
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status);
return totalSize;
}
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

Просмотреть файл

@ -0,0 +1,198 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2011 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: rbbidata.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* RBBI data formats Includes
*
* Structs that describes the format of the Binary RBBI data,
* as it is stored in ICU's data file.
*
* RBBIDataWrapper - Instances of this class sit between the
* raw data structs and the RulesBasedBreakIterator objects
* that are created by applications. The wrapper class
* provides reference counting for the underlying data,
* and direct pointers to data that would not otherwise
* be accessible without ugly pointer arithmetic. The
* wrapper does not attempt to provide any higher level
* abstractions for the data itself.
*
* There will be only one instance of RBBIDataWrapper for any
* set of RBBI run time data being shared by instances
* (clones) of RulesBasedBreakIterator.
*/
#ifndef __RBBIDATA_H__
#define __RBBIDATA_H__
#include "unicode/utypes.h"
#include "unicode/udata.h"
#include "udataswp.h"
/**
* Swap RBBI data. See udataswp.h.
* @internal
*/
U_CAPI int32_t U_EXPORT2
ubrk_swap(const UDataSwapper *ds,
const void *inData, int32_t length, void *outData,
UErrorCode *pErrorCode);
#ifdef __cplusplus
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "utrie.h"
U_NAMESPACE_BEGIN
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */
/* */
/* Offsets and sizes of each of the subsections within the RBBI data. */
/* All offsets are bytes from the start of the RBBIDataHeader. */
/* All sizes are in bytes. */
/* */
uint32_t fFTable; /* forward state transition table. */
uint32_t fFTableLen;
uint32_t fRTable; /* Offset to the reverse state transition table. */
uint32_t fRTableLen;
uint32_t fSFTable; /* safe point forward transition table */
uint32_t fSFTableLen;
uint32_t fSRTable; /* safe point reverse transition table */
uint32_t fSRTableLen;
uint32_t fTrie; /* Offset to Trie data for character categories */
uint32_t fTrieLen;
uint32_t fRuleSource; /* Offset to the source for for the break */
uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
uint32_t fStatusTable; /* Offset to the table of rule status values */
uint32_t fStatusTableLen;
uint32_t fReserved[6]; /* Reserved for expansion */
};
struct RBBIStateTableRow {
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
/* Value 0: not an accepting state. */
/* -1: Unconditional Accepting state. */
/* positive: Look-ahead match has completed. */
/* Actual boundary position happened earlier */
/* Value here == fLookAhead in earlier */
/* state, at actual boundary pos. */
int16_t fLookAhead; /* Non-zero if this row is for a state that */
/* corresponds to a '/' in the rule source. */
/* Value is the same as the fAccepting */
/* value for the rule (which will appear */
/* in a different state. */
int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
/* from a rule. Value is the index in the */
/* StatusTable of the set of matching */
/* tags (rule status values) */
int16_t fReserved;
uint16_t fNextState[2]; /* Next State, indexed by char category. */
/* This array does not have two elements */
/* Array Size is actually fData->fHeader->fCatCount */
/* CAUTION: see RBBITableBuilder::getTableSize() */
/* before changing anything here. */
};
struct RBBIStateTable {
uint32_t fNumStates; /* Number of states. */
uint32_t fRowLen; /* Length of a state table row, in bytes. */
uint32_t fFlags; /* Option Flags for this state table */
uint32_t fReserved; /* reserved */
char fTableData[4]; /* First RBBIStateTableRow begins here. */
/* (making it char[] simplifies ugly address */
/* arithmetic for indexing variable length rows.) */
};
typedef enum {
RBBI_LOOKAHEAD_HARD_BREAK = 1,
RBBI_BOF_REQUIRED = 2
} RBBIStateTableFlags;
/* */
/* The reference counting wrapper class */
/* */
class RBBIDataWrapper : public UMemory {
public:
enum EDontAdopt {
kDontAdopt
};
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper();
void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference();
void removeReference();
UBool operator ==(const RBBIDataWrapper &other) const;
int32_t hashCode();
const UnicodeString &getRuleSourceString() const;
#ifdef RBBI_DEBUG
void printData();
void printTable(const char *heading, const RBBIStateTable *table);
#else
#define printData()
#define printTable(heading, table)
#endif
/* */
/* Pointers to items within the data */
/* */
const RBBIDataHeader *fHeader;
const RBBIStateTable *fForwardTable;
const RBBIStateTable *fReverseTable;
const RBBIStateTable *fSafeFwdTable;
const RBBIStateTable *fSafeRevTable;
const UChar *fRuleSource;
const int32_t *fRuleStatusTable;
/* number of int32_t values in the rule status table. Used to sanity check indexing */
int32_t fStatusMaxIdx;
UTrie fTrie;
private:
int32_t fRefCount;
UDataMemory *fUDataMem;
UnicodeString fRuleString;
UBool fDontFreeData;
RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
};
U_NAMESPACE_END
#endif /* C++ */
#endif

Просмотреть файл

@ -0,0 +1,358 @@
/*
***************************************************************************
* Copyright (C) 2002-2008 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
*/
//
// File: rbbinode.cpp
//
// Implementation of class RBBINode, which represents a node in the
// tree generated when parsing the Rules Based Break Iterator rules.
//
// This "Class" is actually closer to a struct.
// Code using it is expected to directly access fields much of the time.
//
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/parsepos.h"
#include "uvector.h"
#include "rbbirb.h"
#include "rbbinode.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
#ifdef RBBI_DEBUG
static int gLastSerial = 0;
#endif
//-------------------------------------------------------------------------
//
// Constructor. Just set the fields to reasonable default values.
//
//-------------------------------------------------------------------------
RBBINode::RBBINode(NodeType t) : UMemory() {
#ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial;
#endif
fType = t;
fParent = NULL;
fLeftChild = NULL;
fRightChild = NULL;
fInputSet = NULL;
fFirstPos = 0;
fLastPos = 0;
fNullable = FALSE;
fLookAheadEnd = FALSE;
fVal = 0;
fPrecedence = precZero;
UErrorCode status = U_ZERO_ERROR;
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status);
fFollowPos = new UVector(status);
if (t==opCat) {fPrecedence = precOpCat;}
else if (t==opOr) {fPrecedence = precOpOr;}
else if (t==opStart) {fPrecedence = precStart;}
else if (t==opLParen) {fPrecedence = precLParen;}
}
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
#ifdef RBBI_DEBUG
fSerialNum = ++gLastSerial;
#endif
fType = other.fType;
fParent = NULL;
fLeftChild = NULL;
fRightChild = NULL;
fInputSet = other.fInputSet;
fPrecedence = other.fPrecedence;
fText = other.fText;
fFirstPos = other.fFirstPos;
fLastPos = other.fLastPos;
fNullable = other.fNullable;
fVal = other.fVal;
UErrorCode status = U_ZERO_ERROR;
fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
fLastPosSet = new UVector(status);
fFollowPos = new UVector(status);
}
//-------------------------------------------------------------------------
//
// Destructor. Deletes both this node AND any child nodes,
// except in the case of variable reference nodes. For
// these, the l. child points back to the definition, which
// is common for all references to the variable, meaning
// it can't be deleted here.
//
//-------------------------------------------------------------------------
RBBINode::~RBBINode() {
// printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
delete fInputSet;
fInputSet = NULL;
switch (this->fType) {
case varRef:
case setRef:
// for these node types, multiple instances point to the same "children"
// Storage ownership of children handled elsewhere. Don't delete here.
break;
default:
delete fLeftChild;
fLeftChild = NULL;
delete fRightChild;
fRightChild = NULL;
}
delete fFirstPosSet;
delete fLastPosSet;
delete fFollowPos;
}
//-------------------------------------------------------------------------
//
// cloneTree Make a copy of the subtree rooted at this node.
// Discard any variable references encountered along the way,
// and replace with copies of the variable's definitions.
// Used to replicate the expression underneath variable
// references in preparation for generating the DFA tables.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::cloneTree() {
RBBINode *n;
if (fType == RBBINode::varRef) {
// If the current node is a variable reference, skip over it
// and clone the definition of the variable instead.
n = fLeftChild->cloneTree();
} else if (fType == RBBINode::uset) {
n = this;
} else {
n = new RBBINode(*this);
// Check for null pointer.
if (n != NULL) {
if (fLeftChild != NULL) {
n->fLeftChild = fLeftChild->cloneTree();
n->fLeftChild->fParent = n;
}
if (fRightChild != NULL) {
n->fRightChild = fRightChild->cloneTree();
n->fRightChild->fParent = n;
}
}
}
return n;
}
//-------------------------------------------------------------------------
//
// flattenVariables Walk a parse tree, replacing any variable
// references with a copy of the variable's definition.
// Aside from variables, the tree is not changed.
//
// Return the root of the tree. If the root was not a variable
// reference, it remains unchanged - the root we started with
// is the root we return. If, however, the root was a variable
// reference, the root of the newly cloned replacement tree will
// be returned, and the original tree deleted.
//
// This function works by recursively walking the tree
// without doing anything until a variable reference is
// found, then calling cloneTree() at that point. Any
// nested references are handled by cloneTree(), not here.
//
//-------------------------------------------------------------------------
RBBINode *RBBINode::flattenVariables() {
if (fType == varRef) {
RBBINode *retNode = fLeftChild->cloneTree();
delete this;
return retNode;
}
if (fLeftChild != NULL) {
fLeftChild = fLeftChild->flattenVariables();
fLeftChild->fParent = this;
}
if (fRightChild != NULL) {
fRightChild = fRightChild->flattenVariables();
fRightChild->fParent = this;
}
return this;
}
//-------------------------------------------------------------------------
//
// flattenSets Walk the parse tree, replacing any nodes of type setRef
// with a copy of the expression tree for the set. A set's
// equivalent expression tree is precomputed and saved as
// the left child of the uset node.
//
//-------------------------------------------------------------------------
void RBBINode::flattenSets() {
U_ASSERT(fType != setRef);
if (fLeftChild != NULL) {
if (fLeftChild->fType==setRef) {
RBBINode *setRefNode = fLeftChild;
RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild;
fLeftChild = replTree->cloneTree();
fLeftChild->fParent = this;
delete setRefNode;
} else {
fLeftChild->flattenSets();
}
}
if (fRightChild != NULL) {
if (fRightChild->fType==setRef) {
RBBINode *setRefNode = fRightChild;
RBBINode *usetNode = setRefNode->fLeftChild;
RBBINode *replTree = usetNode->fLeftChild;
fRightChild = replTree->cloneTree();
fRightChild->fParent = this;
delete setRefNode;
} else {
fRightChild->flattenSets();
}
}
}
//-------------------------------------------------------------------------
//
// findNodes() Locate all the nodes of the specified type, starting
// at the specified root.
//
//-------------------------------------------------------------------------
void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
/* test for buffer overflows */
if (U_FAILURE(status)) {
return;
}
if (fType == kind) {
dest->addElement(this, status);
}
if (fLeftChild != NULL) {
fLeftChild->findNodes(dest, kind, status);
}
if (fRightChild != NULL) {
fRightChild->findNodes(dest, kind, status);
}
}
//-------------------------------------------------------------------------
//
// print. Print out a single node, for debugging.
//
//-------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBINode::printNode() {
static const char * const nodeTypeNames[] = {
"setRef",
"uset",
"varRef",
"leafChar",
"lookAhead",
"tag",
"endMark",
"opStart",
"opCat",
"opOr",
"opStar",
"opPlus",
"opQuestion",
"opBreak",
"opReverse",
"opLParen"
};
if (this==NULL) {
RBBIDebugPrintf("%10p", (void *)this);
} else {
RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ",
(void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild,
fSerialNum, fFirstPos, fVal);
if (fType == varRef) {
RBBI_DEBUG_printUnicodeString(fText);
}
}
RBBIDebugPrintf("\n");
}
#endif
#ifdef RBBI_DEBUG
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth)
{
int i;
for (i=0; i<s.length(); i++) {
RBBIDebugPrintf("%c", s.charAt(i));
// putc(s.charAt(i), stdout);
}
for (i=s.length(); i<minWidth; i++) {
RBBIDebugPrintf(" ");
}
}
#endif
//-------------------------------------------------------------------------
//
// print. Print out the tree of nodes rooted at "this"
//
//-------------------------------------------------------------------------
#ifdef RBBI_DEBUG
void RBBINode::printTree(UBool printHeading) {
if (printHeading) {
RBBIDebugPrintf( "-------------------------------------------------------------------\n"
" Address type Parent LeftChild RightChild serial position value\n"
);
}
this->printNode();
if (this != NULL) {
// Only dump the definition under a variable reference if asked to.
// Unconditinally dump children of all other node types.
if (fType != varRef) {
if (fLeftChild != NULL) {
fLeftChild->printTree(FALSE);
}
if (fRightChild != NULL) {
fRightChild->printTree(FALSE);
}
}
}
}
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

Просмотреть файл

@ -0,0 +1,118 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2001-2006, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#ifndef RBBINODE_H
#define RBBINODE_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
//
// class RBBINode
//
// Represents a node in the parse tree generated when reading
// a rule file.
//
U_NAMESPACE_BEGIN
class UnicodeSet;
class UVector;
class RBBINode : public UMemory {
public:
enum NodeType {
setRef,
uset,
varRef,
leafChar,
lookAhead,
tag,
endMark,
opStart,
opCat,
opOr,
opStar,
opPlus,
opQuestion,
opBreak,
opReverse,
opLParen
};
enum OpPrecedence {
precZero,
precStart,
precLParen,
precOpOr,
precOpCat
};
NodeType fType;
RBBINode *fParent;
RBBINode *fLeftChild;
RBBINode *fRightChild;
UnicodeSet *fInputSet; // For uset nodes only.
OpPrecedence fPrecedence; // For binary ops only.
UnicodeString fText; // Text corresponding to this node.
// May be lazily evaluated when (if) needed
// for some node types.
int fFirstPos; // Position in the rule source string of the
// first text associated with the node.
// If there's a left child, this will be the same
// as that child's left pos.
int fLastPos; // Last position in the rule source string
// of any text associated with this node.
// If there's a right child, this will be the same
// as that child's last postion.
UBool fNullable; // See Aho.
int32_t fVal; // For leafChar nodes, the value.
// Values are the character category,
// corresponds to columns in the final
// state transition table.
UBool fLookAheadEnd; // For endMark nodes, set TRUE if
// marking the end of a look-ahead rule.
UVector *fFirstPosSet;
UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion.
UVector *fFollowPos;
RBBINode(NodeType t);
RBBINode(const RBBINode &other);
~RBBINode();
RBBINode *cloneTree();
RBBINode *flattenVariables();
void flattenSets();
void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
#ifdef RBBI_DEBUG
void printNode();
void printTree(UBool withHeading);
#endif
private:
RBBINode &operator = (const RBBINode &other); // No defs.
UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used.
#ifdef RBBI_DEBUG
int fSerialNum; // Debugging aids.
#endif
};
#ifdef RBBI_DEBUG
U_CFUNC void
RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0);
#endif
U_NAMESPACE_END
#endif

Просмотреть файл

@ -0,0 +1,318 @@
//
// file: rbbirb.cpp
//
// Copyright (C) 2002-2011, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains the RBBIRuleBuilder class implementation. This is the main class for
// building (compiling) break rules into the tables required by the runtime
// RBBI engine.
//
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/brkiter.h"
#include "unicode/rbbi.h"
#include "unicode/ubrk.h"
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
#include "cmemory.h"
#include "cstring.h"
#include "rbbirb.h"
#include "rbbinode.h"
#include "rbbiscan.h"
#include "rbbisetb.h"
#include "rbbitblb.h"
#include "rbbidata.h"
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------------------
//
// Constructor.
//
//----------------------------------------------------------------------------------------
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status)
: fRules(rules)
{
fStatus = &status; // status is checked below
fParseError = parseErr;
fDebugEnv = NULL;
#ifdef RBBI_DEBUG
fDebugEnv = getenv("U_RBBIDEBUG");
#endif
fForwardTree = NULL;
fReverseTree = NULL;
fSafeFwdTree = NULL;
fSafeRevTree = NULL;
fDefaultTree = &fForwardTree;
fForwardTables = NULL;
fReverseTables = NULL;
fSafeFwdTables = NULL;
fSafeRevTables = NULL;
fRuleStatusVals = NULL;
fChainRules = FALSE;
fLBCMNoChain = FALSE;
fLookAheadHardBreak = FALSE;
fUSetNodes = NULL;
fRuleStatusVals = NULL;
fScanner = NULL;
fSetBuilder = NULL;
if (parseErr) {
uprv_memset(parseErr, 0, sizeof(UParseError));
}
if (U_FAILURE(status)) {
return;
}
fUSetNodes = new UVector(status); // bcos status gets overwritten here
fRuleStatusVals = new UVector(status);
fScanner = new RBBIRuleScanner(this);
fSetBuilder = new RBBISetBuilder(this);
if (U_FAILURE(status)) {
return;
}
if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
//----------------------------------------------------------------------------------------
//
// Destructor
//
//----------------------------------------------------------------------------------------
RBBIRuleBuilder::~RBBIRuleBuilder() {
int i;
for (i=0; ; i++) {
RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
if (n==NULL) {
break;
}
delete n;
}
delete fUSetNodes;
delete fSetBuilder;
delete fForwardTables;
delete fReverseTables;
delete fSafeFwdTables;
delete fSafeRevTables;
delete fForwardTree;
delete fReverseTree;
delete fSafeFwdTree;
delete fSafeRevTree;
delete fScanner;
delete fRuleStatusVals;
}
//----------------------------------------------------------------------------------------
//
// flattenData() - Collect up the compiled RBBI rule data and put it into
// the format for saving in ICU data files,
// which is also the format needed by the RBBI runtime engine.
//
//----------------------------------------------------------------------------------------
static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t i;
if (U_FAILURE(*fStatus)) {
return NULL;
}
// Remove comments and whitespace from the rules to make it smaller.
UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
// Calculate the size of each section in the data.
// Sizes here are padded up to a multiple of 8 for better memory alignment.
// Sections sizes actually stored in the header are for the actual data
// without the padding.
//
int32_t headerSize = align8(sizeof(RBBIDataHeader));
int32_t forwardTableSize = align8(fForwardTables->getTableSize());
int32_t reverseTableSize = align8(fReverseTables->getTableSize());
int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
+ safeFwdTableSize + safeRevTableSize
+ statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
if (data == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memset(data, 0, totalSize);
data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3;
data->fFormatVersion[1] = 1;
data->fFormatVersion[2] = 0;
data->fFormatVersion[3] = 0;
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = reverseTableSize;
data->fSFTable = data->fRTable + reverseTableSize;
data->fSFTableLen = safeFwdTableSize;
data->fSRTable = data->fSFTable + safeFwdTableSize;
data->fSRTableLen = safeRevTableSize;
data->fTrie = data->fSRTable + safeRevTableSize;
data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable);
fReverseTables->exportTable((uint8_t *)data + data->fRTable);
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
for (i=0; i<fRuleStatusVals->size(); i++) {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
return data;
}
//----------------------------------------------------------------------------------------
//
// createRuleBasedBreakIterator construct from source rules that are passed in
// in a UnicodeString
//
//----------------------------------------------------------------------------------------
BreakIterator *
RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status)
{
// status checked below
//
// Read the input rules, generate a parse tree, symbol table,
// and list of all Unicode Sets referenced by the rules.
//
RBBIRuleBuilder builder(rules, parseError, status);
if (U_FAILURE(status)) { // status checked here bcos build below doesn't
return NULL;
}
builder.fScanner->parse();
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input 32-bit characters to
// the character categories.
//
builder.fSetBuilder->build();
//
// Generate the DFA state transition table.
//
builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
{
status = U_MEMORY_ALLOCATION_ERROR;
delete builder.fForwardTables; builder.fForwardTables = NULL;
delete builder.fReverseTables; builder.fReverseTables = NULL;
delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
return NULL;
}
builder.fForwardTables->build();
builder.fReverseTables->build();
builder.fSafeFwdTables->build();
builder.fSafeRevTables->build();
#ifdef RBBI_DEBUG
if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
builder.fForwardTables->printRuleStatusTable();
}
#endif
//
// Package up the compiled data into a memory image
// in the run-time format.
//
RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
if (U_FAILURE(*builder.fStatus)) {
return NULL;
}
//
// Clean up the compiler related stuff
//
//
// Create a break iterator from the compiled rules.
// (Identical to creation from stored pre-compiled rules)
//
// status is checked after init in construction.
RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
if (U_FAILURE(status)) {
delete This;
This = NULL;
}
else if(This == NULL) { // test for NULL
status = U_MEMORY_ALLOCATION_ERROR;
}
return This;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

Просмотреть файл

@ -0,0 +1,211 @@
//
// rbbirb.h
//
// Copyright (C) 2002-2008, International Business Machines Corporation and others.
// All Rights Reserved.
//
// This file contains declarations for several classes from the
// Rule Based Break Iterator rule builder.
//
#ifndef RBBIRB_H
#define RBBIRB_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
#include "unicode/parseerr.h"
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
U_NAMESPACE_BEGIN
class RBBIRuleScanner;
struct RBBIRuleTableEl;
class RBBISetBuilder;
class RBBINode;
class RBBITableBuilder;
//--------------------------------------------------------------------------------
//
// RBBISymbolTable. Implements SymbolTable interface that is used by the
// UnicodeSet parser to resolve references to $variables.
//
//--------------------------------------------------------------------------------
class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
public: // of these structs for each entry.
RBBISymbolTableEntry();
UnicodeString key;
RBBINode *val;
~RBBISymbolTableEntry();
private:
RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
};
class RBBISymbolTable : public UMemory, public SymbolTable {
private:
const UnicodeString &fRules;
UHashtable *fHashTable;
RBBIRuleScanner *fRuleScanner;
// These next two fields are part of the mechanism for passing references to
// already-constructed UnicodeSets back to the UnicodeSet constructor
// when the pattern includes $variable references.
const UnicodeString ffffString; // = "/uffff"
UnicodeSet *fCachedSetLookup;
public:
// API inherited from class SymbolTable
virtual const UnicodeString* lookup(const UnicodeString& s) const;
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
virtual UnicodeString parseReference(const UnicodeString& text,
ParsePosition& pos, int32_t limit) const;
// Additional Functions
RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
virtual ~RBBISymbolTable();
virtual RBBINode *lookupNode(const UnicodeString &key) const;
virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err);
#ifdef RBBI_DEBUG
virtual void rbbiSymtablePrint() const;
#else
// A do-nothing inline function for non-debug builds. Member funcs can't be empty
// or the call sites won't compile.
int32_t fFakeField;
#define rbbiSymtablePrint() fFakeField=0;
#endif
private:
RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
};
//--------------------------------------------------------------------------------
//
// class RBBIRuleBuilder The top-level class handling RBBI rule compiling.
//
//--------------------------------------------------------------------------------
class RBBIRuleBuilder : public UMemory {
public:
// Create a rule based break iterator from a set of rules.
// This function is the main entry point into the rule builder. The
// public ICU API for creating RBBIs uses this function to do the actual work.
//
static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status);
public:
// The "public" functions and data members that appear below are accessed
// (and shared) by the various parts that make up the rule builder. They
// are NOT intended to be accessed by anything outside of the
// rule builder implementation.
RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status
);
virtual ~RBBIRuleBuilder();
char *fDebugEnv; // controls debug trace output
UErrorCode *fStatus; // Error reporting. Keeping status
UParseError *fParseError; // here avoids passing it everywhere.
const UnicodeString &fRules; // The rule string that we are compiling
RBBIRuleScanner *fScanner; // The scanner.
RBBINode *fForwardTree; // The parse trees, generated by the scanner,
RBBINode *fReverseTree; // then manipulated by subsequent steps.
RBBINode *fSafeFwdTree;
RBBINode *fSafeRevTree;
RBBINode **fDefaultTree; // For rules not qualified with a !
// the tree to which they belong to.
UBool fChainRules; // True for chained Unicode TR style rules.
// False for traditional regexp rules.
UBool fLBCMNoChain; // True: suppress chaining of rules on
// chars with LineBreak property == CM.
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
// immediate break, no continuing for the
// longest match.
RBBISetBuilder *fSetBuilder; // Set and Character Category builder.
UVector *fUSetNodes; // Vector of all uset nodes.
RBBITableBuilder *fForwardTables; // State transition tables
RBBITableBuilder *fReverseTables;
RBBITableBuilder *fSafeFwdTables;
RBBITableBuilder *fSafeRevTables;
UVector *fRuleStatusVals; // The values that can be returned
// from getRuleStatus().
RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
// data tables..
private:
RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
};
//----------------------------------------------------------------------------
//
// RBBISetTableEl is an entry in the hash table of UnicodeSets that have
// been encountered. The val Node will be of nodetype uset
// and contain pointers to the actual UnicodeSets.
// The Key is the source string for initializing the set.
//
// The hash table is used to avoid creating duplicate
// unnamed (not $var references) UnicodeSets.
//
// Memory Management:
// The Hash Table owns these RBBISetTableEl structs and
// the key strings. It does NOT own the val nodes.
//
//----------------------------------------------------------------------------
struct RBBISetTableEl {
UnicodeString *key;
RBBINode *val;
};
//----------------------------------------------------------------------------
//
// RBBIDebugPrintf Printf equivalent, for debugging output.
// Conditional compilation of the implementation lets us
// get rid of the stdio dependency in environments where it
// is unavailable.
//
//----------------------------------------------------------------------------
#ifdef RBBI_DEBUG
#include <stdio.h>
#define RBBIDebugPrintf printf
#define RBBIDebugPuts puts
#else
#undef RBBIDebugPrintf
#define RBBIDebugPuts(arg)
#endif
U_NAMESPACE_END
#endif

Просмотреть файл

@ -0,0 +1,275 @@
//---------------------------------------------------------------------------------
//
// Generated Header File. Do not edit by hand.
// This file contains the state table for the ICU Rule Based Break Iterator
// rule parser.
// It is generated by the Perl script "rbbicst.pl" from
// the rule parser state definitions file "rbbirpt.txt".
//
// Copyright (C) 2002-2005 International Business Machines Corporation
// and others. All rights reserved.
//
//---------------------------------------------------------------------------------
#ifndef RBBIRPT_H
#define RBBIRPT_H
U_NAMESPACE_BEGIN
//
// Character classes for RBBI rule scanning.
//
static const uint8_t kRuleSet_digit_char = 128;
static const uint8_t kRuleSet_name_char = 129;
static const uint8_t kRuleSet_name_start_char = 130;
static const uint8_t kRuleSet_rule_char = 131;
static const uint8_t kRuleSet_white_space = 132;
enum RBBI_RuleParseAction {
doCheckVarDef,
doDotAny,
doEndAssign,
doEndOfRule,
doEndVariableName,
doExit,
doExprCatOperator,
doExprFinished,
doExprOrOperator,
doExprRParen,
doExprStart,
doLParen,
doNOP,
doOptionEnd,
doOptionStart,
doReverseDir,
doRuleChar,
doRuleError,
doRuleErrorAssignExpr,
doScanUnicodeSet,
doSlash,
doStartAssign,
doStartTagValue,
doStartVariableName,
doTagDigit,
doTagExpectedError,
doTagValue,
doUnaryOpPlus,
doUnaryOpQuestion,
doUnaryOpStar,
doVariableNameExpectedErr,
rbbiLastAction};
//-------------------------------------------------------------------------------
//
// RBBIRuleTableEl represents the structure of a row in the transition table
// for the rule parser state machine.
//-------------------------------------------------------------------------------
struct RBBIRuleTableEl {
RBBI_RuleParseAction fAction;
uint8_t fCharClass; // 0-127: an individual ASCII character
// 128-255: character class index
uint8_t fNextState; // 0-250: normal next-stat numbers
// 255: pop next-state from stack.
uint8_t fPushState;
UBool fNextChar;
};
static const struct RBBIRuleTableEl gRuleParseStateTable[] = {
{doNOP, 0, 0, 0, TRUE}
, {doExprStart, 254, 21, 8, FALSE} // 1 start
, {doNOP, 132, 1,0, TRUE} // 2
, {doExprStart, 36 /* $ */, 80, 90, FALSE} // 3
, {doNOP, 33 /* ! */, 11,0, TRUE} // 4
, {doNOP, 59 /* ; */, 1,0, TRUE} // 5
, {doNOP, 252, 0,0, FALSE} // 6
, {doExprStart, 255, 21, 8, FALSE} // 7
, {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 8 break-rule-end
, {doNOP, 132, 8,0, TRUE} // 9
, {doRuleError, 255, 95,0, FALSE} // 10
, {doNOP, 33 /* ! */, 13,0, TRUE} // 11 rev-option
, {doReverseDir, 255, 20, 8, FALSE} // 12
, {doOptionStart, 130, 15,0, TRUE} // 13 option-scan1
, {doRuleError, 255, 95,0, FALSE} // 14
, {doNOP, 129, 15,0, TRUE} // 15 option-scan2
, {doOptionEnd, 255, 17,0, FALSE} // 16
, {doNOP, 59 /* ; */, 1,0, TRUE} // 17 option-scan3
, {doNOP, 132, 17,0, TRUE} // 18
, {doRuleError, 255, 95,0, FALSE} // 19
, {doExprStart, 255, 21, 8, FALSE} // 20 reverse-rule
, {doRuleChar, 254, 30,0, TRUE} // 21 term
, {doNOP, 132, 21,0, TRUE} // 22
, {doRuleChar, 131, 30,0, TRUE} // 23
, {doNOP, 91 /* [ */, 86, 30, FALSE} // 24
, {doLParen, 40 /* ( */, 21, 30, TRUE} // 25
, {doNOP, 36 /* $ */, 80, 29, FALSE} // 26
, {doDotAny, 46 /* . */, 30,0, TRUE} // 27
, {doRuleError, 255, 95,0, FALSE} // 28
, {doCheckVarDef, 255, 30,0, FALSE} // 29 term-var-ref
, {doNOP, 132, 30,0, TRUE} // 30 expr-mod
, {doUnaryOpStar, 42 /* * */, 35,0, TRUE} // 31
, {doUnaryOpPlus, 43 /* + */, 35,0, TRUE} // 32
, {doUnaryOpQuestion, 63 /* ? */, 35,0, TRUE} // 33
, {doNOP, 255, 35,0, FALSE} // 34
, {doExprCatOperator, 254, 21,0, FALSE} // 35 expr-cont
, {doNOP, 132, 35,0, TRUE} // 36
, {doExprCatOperator, 131, 21,0, FALSE} // 37
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 38
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 39
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 40
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 41
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 42
, {doExprCatOperator, 123 /* { */, 59,0, TRUE} // 43
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 44
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 45
, {doExprFinished, 255, 255,0, FALSE} // 46
, {doSlash, 47 /* / */, 49,0, TRUE} // 47 look-ahead
, {doNOP, 255, 95,0, FALSE} // 48
, {doExprCatOperator, 254, 21,0, FALSE} // 49 expr-cont-no-slash
, {doNOP, 132, 35,0, TRUE} // 50
, {doExprCatOperator, 131, 21,0, FALSE} // 51
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 52
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 53
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 54
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 55
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 56
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 57
, {doExprFinished, 255, 255,0, FALSE} // 58
, {doNOP, 132, 59,0, TRUE} // 59 tag-open
, {doStartTagValue, 128, 62,0, FALSE} // 60
, {doTagExpectedError, 255, 95,0, FALSE} // 61
, {doNOP, 132, 66,0, TRUE} // 62 tag-value
, {doNOP, 125 /* } */, 66,0, FALSE} // 63
, {doTagDigit, 128, 62,0, TRUE} // 64
, {doTagExpectedError, 255, 95,0, FALSE} // 65
, {doNOP, 132, 66,0, TRUE} // 66 tag-close
, {doTagValue, 125 /* } */, 69,0, TRUE} // 67
, {doTagExpectedError, 255, 95,0, FALSE} // 68
, {doExprCatOperator, 254, 21,0, FALSE} // 69 expr-cont-no-tag
, {doNOP, 132, 69,0, TRUE} // 70
, {doExprCatOperator, 131, 21,0, FALSE} // 71
, {doExprCatOperator, 91 /* [ */, 21,0, FALSE} // 72
, {doExprCatOperator, 40 /* ( */, 21,0, FALSE} // 73
, {doExprCatOperator, 36 /* $ */, 21,0, FALSE} // 74
, {doExprCatOperator, 46 /* . */, 21,0, FALSE} // 75
, {doExprCatOperator, 47 /* / */, 47,0, FALSE} // 76
, {doExprOrOperator, 124 /* | */, 21,0, TRUE} // 77
, {doExprRParen, 41 /* ) */, 255,0, TRUE} // 78
, {doExprFinished, 255, 255,0, FALSE} // 79
, {doStartVariableName, 36 /* $ */, 82,0, TRUE} // 80 scan-var-name
, {doNOP, 255, 95,0, FALSE} // 81
, {doNOP, 130, 84,0, TRUE} // 82 scan-var-start
, {doVariableNameExpectedErr, 255, 95,0, FALSE} // 83
, {doNOP, 129, 84,0, TRUE} // 84 scan-var-body
, {doEndVariableName, 255, 255,0, FALSE} // 85
, {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 86 scan-unicode-set
, {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 87
, {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 88
, {doNOP, 255, 95,0, FALSE} // 89
, {doNOP, 132, 90,0, TRUE} // 90 assign-or-rule
, {doStartAssign, 61 /* = */, 21, 93, TRUE} // 91
, {doNOP, 255, 29, 8, FALSE} // 92
, {doEndAssign, 59 /* ; */, 1,0, TRUE} // 93 assign-end
, {doRuleErrorAssignExpr, 255, 95,0, FALSE} // 94
, {doExit, 255, 95,0, TRUE} // 95 errorDeath
};
#ifdef RBBI_DEBUG
static const char * const RBBIRuleStateNames[] = { 0,
"start",
0,
0,
0,
0,
0,
0,
"break-rule-end",
0,
0,
"rev-option",
0,
"option-scan1",
0,
"option-scan2",
0,
"option-scan3",
0,
0,
"reverse-rule",
"term",
0,
0,
0,
0,
0,
0,
0,
"term-var-ref",
"expr-mod",
0,
0,
0,
0,
"expr-cont",
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
"look-ahead",
0,
"expr-cont-no-slash",
0,
0,
0,
0,
0,
0,
0,
0,
0,
"tag-open",
0,
0,
"tag-value",
0,
0,
0,
"tag-close",
0,
0,
"expr-cont-no-tag",
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
"scan-var-name",
0,
"scan-var-start",
0,
"scan-var-body",
0,
"scan-unicode-set",
0,
0,
0,
"assign-or-rule",
0,
0,
"assign-end",
0,
"errorDeath",
0};
#endif
U_NAMESPACE_END
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше