зеркало из https://github.com/mozilla/pjs.git
683 строки
27 KiB
C
683 строки
27 KiB
C
|
/*
|
||
|
*****************************************************************************************
|
||
|
* *
|
||
|
* COPYRIGHT: *
|
||
|
* (C) Copyright Taligent, Inc., 1997 *
|
||
|
* (C) Copyright International Business Machines Corporation, 1996 *
|
||
|
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
|
||
|
* US Government Users Restricted Rights - Use, duplication, or disclosure *
|
||
|
* restricted by GSA ADP Schedule Contract with IBM Corp. *
|
||
|
* *
|
||
|
*****************************************************************************************
|
||
|
*
|
||
|
* FILE NAME : unistring.h
|
||
|
*
|
||
|
* Modification History:
|
||
|
*
|
||
|
* Date Name Description
|
||
|
* 02/05/97 aliu Added UnicodeString streamIn and streamOut methods.
|
||
|
* 03/26/97 aliu Added indexOf(UniChar,).
|
||
|
* 04/24/97 aliu Numerous changes per code review.
|
||
|
* 05/06/97 helena Added isBogus().
|
||
|
*****************************************************************************************
|
||
|
*/
|
||
|
|
||
|
#ifndef _UNISTRING
|
||
|
#define _UNISTRING
|
||
|
|
||
|
#include <limits.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <iostream.h>
|
||
|
#include <stdio.h>
|
||
|
#include "ptypes.h"
|
||
|
class Locale;
|
||
|
|
||
|
/**
|
||
|
* Simple Unicode string class. This is a simple class that encapsulates a
|
||
|
* Unicode string, allowing the user to manipulate it and allowing it to grow
|
||
|
* and shrink without the user having to worry about this.
|
||
|
* <P>
|
||
|
* The char* interfaces on this class work with either the Latin1 (ISO 8859-1)
|
||
|
* character set or a host character set. The host character set may be any
|
||
|
* 8-bit character set for which TPlatformUtilities::mapHostTo8859_1() and
|
||
|
* TPlatformUtilities::map8859_1ToHost() have been defined; the default
|
||
|
* implementation maps to and from EBCDIC as defined in RFC 1345. If the
|
||
|
* host character set is used, then incoming characters are mapped to Unicode,
|
||
|
* and outgoing characters are mapped back to the host character set.
|
||
|
* <P>
|
||
|
* All inbound transcoding of char* data is done by zero-extending the incoming
|
||
|
* characters, and all outbound transcoding is done by truncating the top byte
|
||
|
* from the characters.
|
||
|
*/
|
||
|
#ifdef NLS_MAC
|
||
|
#pragma export on
|
||
|
#endif
|
||
|
|
||
|
class T_UTILITY_API UnicodeString {
|
||
|
public:
|
||
|
/**
|
||
|
* Standard operator new. This function is only provided because the
|
||
|
* special operator new would otherwise hide it. This function just
|
||
|
* turns around and calls the global operator new function.
|
||
|
*/
|
||
|
void* operator new(size_t size);
|
||
|
|
||
|
/**
|
||
|
* Placement new. This version of operator new just returns the "location"
|
||
|
* parameter unchanged as its result. It ignores the "size" parameter.
|
||
|
* This function is here only to allow stack allocation of UnicodeStrings
|
||
|
* through the C wrapper interface. DO NOT CALL THIS FUNCTION FROM C++
|
||
|
* UNLESS YOU'RE SURE YOU KNOW WHAT YOU'RE DOING!
|
||
|
* @param size Ignored. There's no way this function can check the size
|
||
|
* of the block you pass to it. This function trusts you've
|
||
|
* allocated enough space at that location to hold a Unicode-
|
||
|
* String object.
|
||
|
* @param location The location where you want the new UnicodeString to
|
||
|
* be stored. Typically this will be a local variable on
|
||
|
* the stack. This function trusts that there's enough
|
||
|
* location to hold a UnicodeString object.
|
||
|
* @return Whatever was passed in for "location".
|
||
|
*/
|
||
|
void* operator new(size_t size, void* location);
|
||
|
|
||
|
UnicodeString();
|
||
|
UnicodeString(const UnicodeString& that);
|
||
|
UnicodeString(const UniChar* that);
|
||
|
UnicodeString(const UniChar* that,
|
||
|
t_int32 thatLength);
|
||
|
UnicodeString(const char* that); // Must be null-terminated
|
||
|
|
||
|
/**
|
||
|
* External-buffer constructor. This constructor allows UnicodeString to
|
||
|
* use storage provided by the client as its character buffer, rather than
|
||
|
* allocating its own storage. The client passes a pointer to the storage,
|
||
|
* along with the number of characters currently stored in it (we don't
|
||
|
* use null termination to determine the string length, and the string is
|
||
|
* not ever guaranteed to be null-terminated) and the number of characters
|
||
|
* the storage is capable of holding.
|
||
|
* <P>
|
||
|
* WARNING: Do not change the characters in the buffer during the period
|
||
|
* that the UnicodeString it active. Doing so may lead to
|
||
|
* undefined results.
|
||
|
* <P>
|
||
|
* WARNING: If the string grows beyond the capacity of the buffer passed
|
||
|
* to this constructor, UnicodeString will allocate its own storage,
|
||
|
* and no subsequent changes to the UnicodeString will be reflected
|
||
|
* in the buffer passed to this constructor (UnicodeString itself
|
||
|
* will continue to work right, however.
|
||
|
* <P>
|
||
|
* WARNING: The string stored in the client-owned buffer is never guaranteed
|
||
|
* to be null-terminated.
|
||
|
* @param charBuffer A pointer to a range of storage that the new UnicodeString
|
||
|
* should use as its character-storage buffer. The client
|
||
|
* retains responsibility for deleting this storage after
|
||
|
* the UnicodeString goes away.
|
||
|
* @param numCharsInBuffer The number of characters currently stored in charBuffer.
|
||
|
* @param bufferCapabity The number of characters the buffer if capable of
|
||
|
* holding. This must be greater than or equal to
|
||
|
* numCharsInBuffer, but this isn't checked.
|
||
|
*/
|
||
|
UnicodeString(UniChar* charBuffer,
|
||
|
t_int32 numCharsInBuffer,
|
||
|
t_int32 bufferCapacity);
|
||
|
|
||
|
|
||
|
/* Creates a UnicodeString from a given const char* buffer and an
|
||
|
* encoding name.
|
||
|
* Netscape added method.
|
||
|
* <P>
|
||
|
* @param that A null-terminated char buffer in a given encoding
|
||
|
* @param encoding name for the encoding used for buffer
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
UnicodeString(const char* that,
|
||
|
const char* encoding);
|
||
|
~UnicodeString() { if (!fClientOwnsStorage)
|
||
|
delete [] fChars; }
|
||
|
|
||
|
UnicodeString& operator=(const UnicodeString& that);
|
||
|
|
||
|
/**
|
||
|
* Compares a UnicodeString to something else. All versions of compare()
|
||
|
* do bitwise comparison; internationally-sensitive comparison requires
|
||
|
* the Collation library. The offset and length parameters are pinned to
|
||
|
* permissible values if they are out of range.
|
||
|
*/
|
||
|
t_int8 compare(const UnicodeString& that) const;
|
||
|
t_int8 compare(TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
const UnicodeString& that,
|
||
|
TextOffset thatOffset,
|
||
|
t_int32 thatLength) const;
|
||
|
t_int8 compare(const UniChar* that) const; // Must be null-terminated
|
||
|
t_int8 compare(const UniChar* that,
|
||
|
t_int32 thatLength) const;
|
||
|
t_int8 compare(const char* that) const;
|
||
|
|
||
|
/**
|
||
|
* Compares substrings of two UnicodeStrings. Same as compare(), but
|
||
|
* takes starting and ending offsets instead of starting offsets and
|
||
|
* character counts. The characters from the starting offset up to, but
|
||
|
* not including the ending offset are compared. The start and limit
|
||
|
* parameters are pinned to permissible values if they are out of range.
|
||
|
*/
|
||
|
t_int8 compareBetween( TextOffset thisStart,
|
||
|
TextOffset thisLimit,
|
||
|
const UnicodeString& that,
|
||
|
TextOffset thatStart,
|
||
|
TextOffset thatLimit) const;
|
||
|
|
||
|
/**
|
||
|
* Comparison operators. All of these operators map through to compare().
|
||
|
*/
|
||
|
t_bool operator==(const UnicodeString& that) const;
|
||
|
t_bool operator!=(const UnicodeString& that) const;
|
||
|
t_bool operator>(const UnicodeString& that) const;
|
||
|
t_bool operator<(const UnicodeString& that) const;
|
||
|
t_bool operator>=(const UnicodeString& that) const;
|
||
|
t_bool operator<=(const UnicodeString& that) const;
|
||
|
|
||
|
/**
|
||
|
* Returns the offset within this String of the first occurrence of the
|
||
|
* specified substring "that". The search begins with the character at fromIndex
|
||
|
* and examines at most forLength characters. Returns -1 if "that" is not found.
|
||
|
*/
|
||
|
TextOffset indexOf(const UnicodeString& that,
|
||
|
TextOffset fromOffset = 0,
|
||
|
t_uint32 forLength = -1) const;
|
||
|
TextOffset indexOf(UniChar character,
|
||
|
TextOffset fromOffset = 0,
|
||
|
t_uint32 forLength = -1) const;
|
||
|
/**
|
||
|
* Returns the offset within this String of the last occurrence of the
|
||
|
* specified substring "that". The search begins with the character before fromOffset
|
||
|
* and examines at most forLength characters (moving backward from fromOffset).
|
||
|
* Returns -1 if "that" is not found.
|
||
|
*/
|
||
|
TextOffset lastIndexOf(const UnicodeString& that,
|
||
|
TextOffset fromOffset = T_INT32_MAX,
|
||
|
t_uint32 forLength = -1) const;
|
||
|
TextOffset lastIndexOf(UniChar character,
|
||
|
TextOffset fromOffset = T_INT32_MAX,
|
||
|
t_uint32 forLength = -1) const;
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns true if "that" appears in its entirety at the beginning of "this"
|
||
|
*/
|
||
|
t_bool startsWith(const UnicodeString& that) const;
|
||
|
|
||
|
/**
|
||
|
* Returns true if "that" appears in its entirety at the end of "this"
|
||
|
*/
|
||
|
t_bool endsWith(const UnicodeString& that) const;
|
||
|
|
||
|
/**
|
||
|
* Stores in "that" a copy of "this" that has had leading and trailing whitespace
|
||
|
* removed from it. "this" itself is unaffected.
|
||
|
*/
|
||
|
UnicodeString& trim(UnicodeString& that) const;
|
||
|
|
||
|
/**
|
||
|
* Trims leading and trailing whitespace from this UnicodeString.
|
||
|
*/
|
||
|
void trim();
|
||
|
|
||
|
/**
|
||
|
* If the string is shorter than targetLength, adds enough copies of padChar to the
|
||
|
* beginning to make the length targetLength and returns true; otherwise returns false.
|
||
|
*/
|
||
|
t_bool padLeading( t_int32 targetLength,
|
||
|
UniChar padChar = ' ');
|
||
|
|
||
|
/**
|
||
|
* If the string is shorter than targetLength, adds enough copies of padChar to the
|
||
|
* end to make the length targetLength and returns true; otherwise returns false.
|
||
|
*/
|
||
|
t_bool padTrailing(t_int32 targetLength,
|
||
|
UniChar padChar = ' ');
|
||
|
|
||
|
/**
|
||
|
* If the string is longer than targetLength, deletes enough characters from the
|
||
|
* end to make the length targetLength and returns true; otherwise returns false.
|
||
|
*/
|
||
|
t_bool truncate(t_int32 targetLength);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Allows UnicodeString to be used with interfaces that use UniChar*.
|
||
|
* Returns a pointer to the UnicodeString's internal storage. This
|
||
|
* storage is still owned by the UnicodeString, and the caller is not
|
||
|
* allowed to change it. The string returned by this function is
|
||
|
* correctly null-terminated.
|
||
|
*/
|
||
|
operator const UniChar*() const;
|
||
|
|
||
|
/**
|
||
|
* Extracts the characters from a UnicodeString without copying. Returns
|
||
|
* a pointer to the UnicodeString's internal storage. The caller
|
||
|
* acquires ownership of this storage and is responsible for deleting
|
||
|
* it. The UnicodeString is set to empty by this operation. WARNING: The
|
||
|
* string returned is not null-terminated unless the caller explicitly
|
||
|
* adds a null character to the end with operator+=().
|
||
|
*/
|
||
|
UniChar* orphanStorage() ;
|
||
|
|
||
|
/**
|
||
|
* Extracts a substring. Extracts the specified substring of the
|
||
|
* UnicodeString into the storage referred to by extractInto. The offset
|
||
|
* and length parameters are pinned to permissible values if they are
|
||
|
* out of range.
|
||
|
* <P>
|
||
|
* NOTE: No null byte is written to UniChar* extractInto. If you want
|
||
|
* extractInto to have a null-terminated string you should do
|
||
|
* extractInto[len]=0, where len is the actual number of characters
|
||
|
* extracted.
|
||
|
*/
|
||
|
UnicodeString& extract( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
UnicodeString& extractInto) const;
|
||
|
void extract( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
UniChar* extractInto) const;
|
||
|
|
||
|
/**
|
||
|
* This version of extract() extracts into an array of char. The
|
||
|
* characters are converted from UniChar to char by truncating the
|
||
|
* high-order byte (in other words, this function assumes the Unicode
|
||
|
* data being converted is all from the Latin1 character set). The
|
||
|
* offset and length parameters are pinned to permissible values if they
|
||
|
* are out of range.
|
||
|
* <P>
|
||
|
* NOTE: No null byte is written. If you want extractInto to have a
|
||
|
* null-terminated string you should do extractInto[len]=0, where len is
|
||
|
* the actual number of characters extracted.
|
||
|
*/
|
||
|
void extract( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
char* extractInto) const;
|
||
|
|
||
|
/**
|
||
|
* Extract a substring. Same as extract(), but the substring is
|
||
|
* specified as starting and ending offsets [start, limit). That is,
|
||
|
* from the starting offset up to, but not including, the ending offset.
|
||
|
* The start and limit parameters are pinned to permissible values if
|
||
|
* they are out of range.
|
||
|
*/
|
||
|
UnicodeString& extractBetween( TextOffset start,
|
||
|
TextOffset limit,
|
||
|
UnicodeString& extractInto) const;
|
||
|
|
||
|
/**
|
||
|
* Return the character at the given offset of this string. If the
|
||
|
* offset is out of range, return 0 (for the const method) or a
|
||
|
* reference to a UniChar having the value 0 (for the non-const method).
|
||
|
*/
|
||
|
UniChar operator[](TextOffset offset) const;
|
||
|
UniChar& operator[](TextOffset offset);
|
||
|
|
||
|
/**
|
||
|
* Append a string or character. The specfied string or character is added
|
||
|
* to the end of the string.
|
||
|
*/
|
||
|
UnicodeString& operator+=(const UnicodeString& that);
|
||
|
UnicodeString& operator+=(UniChar that);
|
||
|
|
||
|
/**
|
||
|
* Insert a string. The contents of "that" are inserted into *this so that
|
||
|
* the first character from "that" occurs at thisOffset. If thisOffset is out
|
||
|
* of range, the new characters are added at the end.
|
||
|
*/
|
||
|
UnicodeString& insert( TextOffset thisOffset,
|
||
|
const UnicodeString& that);
|
||
|
|
||
|
/**
|
||
|
* Remove part of this string. remove() with no arguments removes all
|
||
|
* characters of this string. Note: The storage is not removed, but the
|
||
|
* logical length, and possibly the contents, are altered.
|
||
|
*/
|
||
|
UnicodeString& remove();
|
||
|
UnicodeString& remove( TextOffset offset,
|
||
|
t_int32 length = T_INT32_MAX);
|
||
|
|
||
|
/**
|
||
|
* Delete characters. Same as remove(), but the range of characters to
|
||
|
* delete is specified as a pair of starting and ending offsets [start,
|
||
|
* limit), rather than a starting offset and a character count. That is,
|
||
|
* from the starting offset up to, but not including, the ending offset.
|
||
|
* The start and limit parameters are pinned to permissible values if
|
||
|
* they are out of range.
|
||
|
*/
|
||
|
UnicodeString& removeBetween( TextOffset start = 0,
|
||
|
TextOffset limit = T_INT32_MAX);
|
||
|
|
||
|
/**
|
||
|
* Replace characters. Replaces the characters in the range specified by
|
||
|
* thisOffset and thisLength with the characters in "that" (or the specfied
|
||
|
* subrange of "that"). All parameters are pinned to permissible values
|
||
|
* if necessary. If the source and replacement text are different lengths,
|
||
|
* the string will be lengthened or shortened as necessary.
|
||
|
*/
|
||
|
UnicodeString& replace( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
const UnicodeString& that,
|
||
|
TextOffset thatOffset = 0,
|
||
|
t_int32 thatLength = T_INT32_MAX);
|
||
|
UnicodeString& replace( TextOffset thisOfset,
|
||
|
t_int32 thisLength,
|
||
|
const UniChar* that);
|
||
|
UnicodeString& replace( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
const UniChar* that,
|
||
|
t_int32 thatLength);
|
||
|
|
||
|
UnicodeString& replace( TextOffset thisOffset,
|
||
|
t_int32 thisLength,
|
||
|
const char* that);
|
||
|
|
||
|
/**
|
||
|
* Replace characters. Same as replace(), but the affected subranges are
|
||
|
* specified as pairs of starting and ending offsets [start, limit)
|
||
|
* rather than starting offsets and lengths. That is, from the starting
|
||
|
* offset up to, but not including, the ending offset. The start and
|
||
|
* limit parameters are pinned to permissible values if they are out of
|
||
|
* range.
|
||
|
*/
|
||
|
UnicodeString& replaceBetween( TextOffset thisStart,
|
||
|
TextOffset thisLimit,
|
||
|
const UnicodeString& that,
|
||
|
TextOffset thatStart = 0,
|
||
|
TextOffset thatLimit = T_INT32_MAX);
|
||
|
|
||
|
/**
|
||
|
* Replaces all occurrences of "oldText" in the string in the range defined by
|
||
|
* fromOffset and forLength with "newText".
|
||
|
*/
|
||
|
void findAndReplace( const UnicodeString& oldText,
|
||
|
const UnicodeString& newText,
|
||
|
TextOffset fromOffset = 0,
|
||
|
t_uint32 forLength = -1);
|
||
|
/**
|
||
|
* Reverse the characters in this string in place. That is, "abcd"
|
||
|
* becomes "dcba". Return a reference to this string.
|
||
|
*/
|
||
|
UnicodeString& reverse();
|
||
|
UnicodeString& reverse(TextOffset from,
|
||
|
TextOffset to);
|
||
|
|
||
|
/**
|
||
|
* Convert this string to uppercase or lowercase. The methods which take
|
||
|
* no arguments use the default Locale. (These methods cannot take a
|
||
|
* default argument of Locale::getDefault() because that would create a
|
||
|
* circular class dependency between UnicodeString and Locale.)
|
||
|
*/
|
||
|
UnicodeString& toUpper();
|
||
|
UnicodeString& toUpper(const Locale& locale);
|
||
|
UnicodeString& toLower();
|
||
|
UnicodeString& toLower(const Locale& locale);
|
||
|
|
||
|
/**
|
||
|
* Return the length of this string. This will always be a non-negative
|
||
|
* number.
|
||
|
*/
|
||
|
t_int32 size() const;
|
||
|
|
||
|
/**
|
||
|
* Return the hash code for this string. This is used by hash tables
|
||
|
* which use this object as a key. The hash code is cached, and
|
||
|
* recomputed when necessary. For this reason, this method may alter the
|
||
|
* physical object, even though it is semantically const.
|
||
|
*/
|
||
|
t_int32 hashCode() const;
|
||
|
|
||
|
/**
|
||
|
* Returns the number of display cells the specified substring takes up.
|
||
|
* This function is designed for Asian text and properly takes into account
|
||
|
* halfwidth and fullwidth variants of various CJK characters and the combining
|
||
|
* behavior of the Hangul Jamo characters (with some limitations; see
|
||
|
* documentation for Unicode::getCellWidth()).
|
||
|
* <P>
|
||
|
* In order to avoid dealing
|
||
|
* with fractions, this function can either be construed to return twice the
|
||
|
* actual number of display cells or to treat a "cell" as the width of a halfwidth
|
||
|
* character rather than the width of a fullwidth character.
|
||
|
* <P>
|
||
|
* The "asian" parameter controls whether characters considered NEUTRAL by
|
||
|
* the Unicode class are treated as halfwidth or fullwidth here. If you set
|
||
|
* "asian" to FALSE, neutrals are treated as halfwidth, and this function returns
|
||
|
* a close approximation of how many Latin display cells the text will take up
|
||
|
* in a monospaced font.
|
||
|
*/
|
||
|
t_int32 numDisplayCells(TextOffset fromOffset = 0,
|
||
|
t_int32 forLength = T_INT32_MAX,
|
||
|
t_bool asian = TRUE) const;
|
||
|
/**
|
||
|
* The streamIn and streamOut methods read and write objects of this
|
||
|
* class as binary, platform-dependent data in the iostream. The stream
|
||
|
* must be in ios::binary mode for this to work. These methods are not
|
||
|
* intended for general public use; they are used by the framework to
|
||
|
* improve performance by storing certain objects in binary files.
|
||
|
*/
|
||
|
void streamOut(FILE* os) const;
|
||
|
void streamIn(FILE* is);
|
||
|
|
||
|
/**
|
||
|
* Returns TRUE if the string resize failed. It is very important
|
||
|
* to check if a unicode string is valid after modification.
|
||
|
*/
|
||
|
t_bool isBogus() const;
|
||
|
|
||
|
/*
|
||
|
* Additional Netscape routines
|
||
|
*/
|
||
|
/** Converts the String to a char* using a target encoding */
|
||
|
char* toCString(const char* encoding) const;
|
||
|
|
||
|
/** Compare case insensitive. Still diacrit sensitive. Is not locale sensitive.
|
||
|
* All versions of compare() do bitwise comparison; internationally-
|
||
|
* sensitive comparison requires the Collation library. */
|
||
|
int compareIgnoreCase(const UnicodeString& that) const;
|
||
|
int compareIgnoreCase(const UniChar* that,
|
||
|
t_int32 thatLength) const;
|
||
|
int compareIgnoreCase(const UniChar* that) const;
|
||
|
int compareIgnoreCase(const char* that,
|
||
|
const char* encoding) const;
|
||
|
/* Assumes a LATIN-1 string */
|
||
|
int
|
||
|
compareIgnoreCase(const char* that) const;
|
||
|
|
||
|
|
||
|
private:
|
||
|
/* Netscape Private */
|
||
|
char* toCStringTruncate() const;
|
||
|
|
||
|
static t_int32 lengthOf(const UniChar* chars);
|
||
|
static t_int32 lengthOf(const char* chars);
|
||
|
void resize(t_int32 newLength);
|
||
|
void setToBogus(void);
|
||
|
static void copy( const UniChar* from,
|
||
|
UniChar* to,
|
||
|
t_int32 numChars);
|
||
|
static void copy( const char* from,
|
||
|
UniChar* to,
|
||
|
t_int32 numChars);
|
||
|
static void copy( const UniChar* from,
|
||
|
char* to,
|
||
|
t_int32 numChars);
|
||
|
|
||
|
t_int8 doCompare( const UniChar* thiss,
|
||
|
t_int32 thisLength,
|
||
|
const UniChar* that,
|
||
|
t_int32 thatLength) const;
|
||
|
static const t_int32 kInvalidHashCode;
|
||
|
static const t_int32 kEmptyHashCode;
|
||
|
static UniChar fgErrorChar;
|
||
|
|
||
|
UniChar* fChars;
|
||
|
t_int32 fSize;
|
||
|
t_int32 fCapacity;
|
||
|
t_int32 fHashCode;
|
||
|
t_bool fClientOwnsStorage;
|
||
|
t_bool fBogus;
|
||
|
};
|
||
|
|
||
|
#ifdef NLS_MAC
|
||
|
#pragma export off
|
||
|
#endif
|
||
|
|
||
|
/**
|
||
|
* Write the contents of a UnicodeString to an ostream. This functions writes
|
||
|
* the characters in a UnicodeString to an ostream. The UniChars in the
|
||
|
* UnicodeString are truncated to char, leading to undefined results with
|
||
|
* anything not in the Latin1 character set.
|
||
|
*/
|
||
|
NLSUNIAPI_PUBLIC(ostream&) operator<<(ostream& stream,
|
||
|
const UnicodeString& string);
|
||
|
|
||
|
//----------------------------------------------------
|
||
|
// operator new
|
||
|
//----------------------------------------------------
|
||
|
|
||
|
inline void*
|
||
|
UnicodeString::operator new(size_t size)
|
||
|
{
|
||
|
return ::operator new(size);
|
||
|
}
|
||
|
|
||
|
inline void*
|
||
|
UnicodeString::operator new(size_t size, void* location)
|
||
|
{
|
||
|
// WARNING: Do not use this operator unless you're sure you know what you're
|
||
|
// doing! It just passes "location" through blindly. If there isn't enough
|
||
|
// free space at "location" to hold a UnicodeString (or if "location" is
|
||
|
// somehow invalid), you're in trouble!
|
||
|
return location;
|
||
|
}
|
||
|
//----------------------------------------------------
|
||
|
// Fast append
|
||
|
//----------------------------------------------------
|
||
|
|
||
|
inline UnicodeString&
|
||
|
UnicodeString::operator+=(UniChar that)
|
||
|
{
|
||
|
if (fSize < fCapacity) {
|
||
|
fChars[fSize++] = that;
|
||
|
fHashCode = kInvalidHashCode;
|
||
|
} else {
|
||
|
resize(fSize + 1);
|
||
|
if (!fBogus) // change required for HP-UX
|
||
|
fChars[fSize - 1] = that;
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------
|
||
|
// Character access
|
||
|
//----------------------------------------------------
|
||
|
|
||
|
inline UniChar
|
||
|
UnicodeString::operator[](TextOffset offset) const
|
||
|
{
|
||
|
// Cast to unsigned in order to detect negative values.
|
||
|
// Assume fSize >= 0.
|
||
|
return ((t_uint32)offset < (t_uint32)fSize) ? fChars[offset] : 0;
|
||
|
}
|
||
|
|
||
|
inline UniChar&
|
||
|
UnicodeString::operator[](TextOffset offset)
|
||
|
{
|
||
|
// Cast to unsigned in order to detect negative values
|
||
|
// Assume fSize >= 0.
|
||
|
|
||
|
UniChar& result = fgErrorChar;
|
||
|
if ((t_uint32)offset < (t_uint32)fSize)
|
||
|
{
|
||
|
fHashCode = kInvalidHashCode;
|
||
|
result = fChars[offset];
|
||
|
} else
|
||
|
{
|
||
|
fgErrorChar = 0; // Always reset this to zero in case the caller has modified it
|
||
|
result = fgErrorChar;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
//----------------------------------------------------
|
||
|
// Other inline methods
|
||
|
//----------------------------------------------------
|
||
|
|
||
|
inline UnicodeString&
|
||
|
UnicodeString::remove()
|
||
|
{
|
||
|
fSize = 0;
|
||
|
fBogus = FALSE;
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
inline t_int32
|
||
|
UnicodeString::size() const
|
||
|
{
|
||
|
return fSize;
|
||
|
}
|
||
|
|
||
|
inline t_int8
|
||
|
UnicodeString::compare(const UnicodeString& that) const
|
||
|
{
|
||
|
return doCompare(fChars, fSize, that.fChars, that.fSize);
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator==(const UnicodeString& that) const
|
||
|
{
|
||
|
// Check fSize first to avoid the call to compare in many cases
|
||
|
return fSize == that.fSize && compare(that) == 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator!=(const UnicodeString& that) const
|
||
|
{
|
||
|
return compare(that) != 0;
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator>(const UnicodeString& that) const
|
||
|
{
|
||
|
return compare(that) == 1;
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator<(const UnicodeString& that) const
|
||
|
{
|
||
|
return compare(that) == -1;
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator<=(const UnicodeString& that) const
|
||
|
{
|
||
|
return compare(that) != 1;
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::operator>=(const UnicodeString& that) const
|
||
|
{
|
||
|
return compare(that) != -1;
|
||
|
}
|
||
|
|
||
|
inline t_bool
|
||
|
UnicodeString::isBogus() const { return fBogus; }
|
||
|
|
||
|
/**
|
||
|
* The arrayCopy() methods copy an array of UnicodeString OBJECTS (not
|
||
|
* pointers).
|
||
|
*/
|
||
|
inline void arrayCopy(const UnicodeString* src, UnicodeString* dst, t_int32 count)
|
||
|
{ while (count-- > 0) *dst++ = *src++; }
|
||
|
|
||
|
inline void arrayCopy(const UnicodeString* src, t_int32 srcStart, UnicodeString* dst, t_int32 dstStart, t_int32 count)
|
||
|
{ arrayCopy(src+srcStart, dst+dstStart, count); }
|
||
|
|
||
|
#endif
|