gecko-dev/js/src/jsstr.h

555 строки
18 KiB
C
Исходник Обычный вид История

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
1998-03-28 05:44:41 +03:00
*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
1998-03-28 05:44:41 +03:00
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
1998-03-28 05:44:41 +03:00
*
2001-09-20 04:02:59 +04:00
* The Original Code is Mozilla Communicator client code, released
* March 31, 1998.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
1998-03-28 05:44:41 +03:00
#ifndef jsstr_h___
#define jsstr_h___
1998-03-28 05:44:41 +03:00
#include <ctype.h>
2010-07-15 10:19:36 +04:00
#include "jsapi.h"
1998-03-28 05:44:41 +03:00
#include "jsprvtd.h"
#include "jshashtable.h"
#include "jslock.h"
#include "jsobj.h"
2010-07-15 10:19:36 +04:00
#include "jsvalue.h"
2010-09-24 21:54:39 +04:00
#include "jscell.h"
namespace js {
/* Implemented in jsstrinlines.h */
class StringBuffer;
2010-07-17 04:41:22 +04:00
/*
* When an algorithm does not need a string represented as a single linear
* array of characters, this range utility may be used to traverse the string a
* sequence of linear arrays of characters. This avoids flattening ropes.
2010-07-17 04:41:22 +04:00
*
* Implemented in jsstrinlines.h.
2010-07-17 04:41:22 +04:00
*/
class StringSegmentRange;
class MutatingRopeSegmentRange;
2010-07-17 04:41:22 +04:00
/*
* Utility for building a rope (lazy concatenation) of strings.
*/
class RopeBuilder;
2010-07-17 04:41:22 +04:00
} /* namespace js */
2010-07-17 04:41:22 +04:00
extern JSString * JS_FASTCALL
js_ConcatStrings(JSContext *cx, JSString *s1, JSString *s2);
extern JSString * JS_FASTCALL
js_toLowerCase(JSContext *cx, JSString *str);
extern JSString * JS_FASTCALL
js_toUpperCase(JSContext *cx, JSString *str);
1998-03-28 05:44:41 +03:00
struct JSSubString {
size_t length;
const jschar *chars;
};
extern jschar js_empty_ucstr[];
extern JSSubString js_EmptySubString;
/* Unicode character attribute lookup tables. */
extern const uint8 js_X[];
extern const uint8 js_Y[];
extern const uint32 js_A[];
1998-03-28 05:44:41 +03:00
/* Enumerated Unicode general category types. */
typedef enum JSCharType {
JSCT_UNASSIGNED = 0,
JSCT_UPPERCASE_LETTER = 1,
JSCT_LOWERCASE_LETTER = 2,
JSCT_TITLECASE_LETTER = 3,
JSCT_MODIFIER_LETTER = 4,
JSCT_OTHER_LETTER = 5,
JSCT_NON_SPACING_MARK = 6,
JSCT_ENCLOSING_MARK = 7,
JSCT_COMBINING_SPACING_MARK = 8,
JSCT_DECIMAL_DIGIT_NUMBER = 9,
JSCT_LETTER_NUMBER = 10,
JSCT_OTHER_NUMBER = 11,
JSCT_SPACE_SEPARATOR = 12,
JSCT_LINE_SEPARATOR = 13,
JSCT_PARAGRAPH_SEPARATOR = 14,
JSCT_CONTROL = 15,
JSCT_FORMAT = 16,
JSCT_PRIVATE_USE = 18,
JSCT_SURROGATE = 19,
JSCT_DASH_PUNCTUATION = 20,
JSCT_START_PUNCTUATION = 21,
JSCT_END_PUNCTUATION = 22,
JSCT_CONNECTOR_PUNCTUATION = 23,
JSCT_OTHER_PUNCTUATION = 24,
JSCT_MATH_SYMBOL = 25,
JSCT_CURRENCY_SYMBOL = 26,
JSCT_MODIFIER_SYMBOL = 27,
JSCT_OTHER_SYMBOL = 28
} JSCharType;
/* Character classifying and mapping macros, based on java.lang.Character. */
#define JS_CCODE(c) (js_A[js_Y[(js_X[(uint16)(c)>>6]<<6)|((c)&0x3F)]])
1998-03-28 05:44:41 +03:00
#define JS_CTYPE(c) (JS_CCODE(c) & 0x1F)
#define JS_ISALPHA(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
2004-12-09 04:32:19 +03:00
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER)) \
>> JS_CTYPE(c)) & 1)
1998-03-28 05:44:41 +03:00
#define JS_ISALNUM(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
2004-12-09 04:32:19 +03:00
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
>> JS_CTYPE(c)) & 1)
1998-03-28 05:44:41 +03:00
/* A unicode letter, suitable for use in an identifier. */
#define JS_ISLETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
2004-12-09 04:32:19 +03:00
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER)) \
>> JS_CTYPE(c)) & 1)
/*
* 'IdentifierPart' from ECMA grammar, is Unicode letter or combining mark or
* digit or connector punctuation.
*/
#define JS_ISIDPART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
2004-12-09 04:32:19 +03:00
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER) | \
(1 << JSCT_NON_SPACING_MARK) | \
(1 << JSCT_COMBINING_SPACING_MARK) | \
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
>> JS_CTYPE(c)) & 1)
/* Unicode control-format characters, ignored in input */
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
extern const bool js_isidstart[];
extern const bool js_isident[];
static inline bool
JS_ISIDSTART(int c)
{
unsigned w = c;
return (w < 128) ? js_isidstart[w] : JS_ISLETTER(c);
}
static inline bool
JS_ISIDENT(int c)
{
unsigned w = c;
return (w < 128) ? js_isident[w] : JS_ISIDPART(c);
}
#define JS_ISXMLSPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || \
(c) == '\n')
#define JS_ISXMLNSSTART(c) ((JS_CCODE(c) & 0x00000100) || (c) == '_')
#define JS_ISXMLNS(c) ((JS_CCODE(c) & 0x00000080) || (c) == '.' || \
(c) == '-' || (c) == '_')
#define JS_ISXMLNAMESTART(c) (JS_ISXMLNSSTART(c) || (c) == ':')
#define JS_ISXMLNAME(c) (JS_ISXMLNS(c) || (c) == ':')
1998-03-28 05:44:41 +03:00
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
const jschar BYTE_ORDER_MARK = 0xFEFF;
const jschar BYTE_ORDER_MARK2 = 0xFFFE;
const jschar NO_BREAK_SPACE = 0x00A0;
extern const bool js_isspace[];
static inline bool
JS_ISSPACE(int c)
{
2009-08-30 01:19:44 +04:00
unsigned w = c;
return (w < 128)
? js_isspace[w]
: w == NO_BREAK_SPACE || w == BYTE_ORDER_MARK ||
(JS_CCODE(w) & 0x00070000) == 0x00040000;
}
static inline bool
JS_ISSPACE_OR_BOM(int c)
{
unsigned w = c;
/* Treat little- and big-endian BOMs as whitespace for compatibility. */
return (w < 128)
? js_isspace[w]
: w == NO_BREAK_SPACE || w == BYTE_ORDER_MARK || w == BYTE_ORDER_MARK2 ||
(JS_CCODE(w) & 0x00070000) == 0x00040000;
}
1998-03-28 05:44:41 +03:00
#define JS_ISPRINT(c) ((c) < 128 && isprint(c))
#define JS_ISUPPER(c) (JS_CTYPE(c) == JSCT_UPPERCASE_LETTER)
#define JS_ISLOWER(c) (JS_CTYPE(c) == JSCT_LOWERCASE_LETTER)
#define JS_TOUPPER(c) ((jschar) ((JS_CCODE(c) & 0x00100000) \
? (c) - ((int32)JS_CCODE(c) >> 22) \
: (c)))
#define JS_TOLOWER(c) ((jschar) ((JS_CCODE(c) & 0x00200000) \
? (c) + ((int32)JS_CCODE(c) >> 22) \
: (c)))
1998-03-28 05:44:41 +03:00
/*
* Shorthands for ASCII (7-bit) decimal and hex conversion.
* Manually inline isdigit for performance; MSVC doesn't do this for us.
*/
#define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
1998-03-28 05:44:41 +03:00
#define JS7_UNDEC(c) ((c) - '0')
#define JS7_ISHEX(c) ((c) < 128 && isxdigit(c))
#define JS7_UNHEX(c) (uintN)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
1998-03-28 05:44:41 +03:00
#define JS7_ISLET(c) ((c) < 128 && isalpha(c))
/* Initialize the String class, returning its prototype object. */
2010-07-15 10:19:36 +04:00
extern js::Class js_StringClass;
inline bool
JSObject::isString() const
{
return getClass() == &js_StringClass;
}
1998-03-28 05:44:41 +03:00
extern JSObject *
js_InitStringClass(JSContext *cx, JSObject *obj);
extern const char js_escape_str[];
extern const char js_unescape_str[];
extern const char js_uneval_str[];
extern const char js_decodeURI_str[];
extern const char js_encodeURI_str[];
extern const char js_decodeURIComponent_str[];
extern const char js_encodeURIComponent_str[];
1998-03-28 05:44:41 +03:00
/* GC-allocate a string descriptor for the given malloc-allocated chars. */
extern JSFixedString *
js_NewString(JSContext *cx, jschar *chars, size_t length);
1998-03-28 05:44:41 +03:00
extern JSLinearString *
js_NewDependentString(JSContext *cx, JSString *base, size_t start, size_t length);
1998-03-28 05:44:41 +03:00
/* Copy a counted string and GC-allocate a descriptor for it. */
extern JSFixedString *
js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n);
1998-03-28 05:44:41 +03:00
extern JSFixedString *
js_NewStringCopyN(JSContext *cx, const char *s, size_t n);
1998-03-28 05:44:41 +03:00
/* Copy a C string and GC-allocate a descriptor for it. */
extern JSFixedString *
js_NewStringCopyZ(JSContext *cx, const jschar *s);
1998-03-28 05:44:41 +03:00
extern JSFixedString *
js_NewStringCopyZ(JSContext *cx, const char *s);
/*
* Convert a value to a printable C string.
*/
extern const char *
js_ValueToPrintable(JSContext *cx, const js::Value &,
JSAutoByteString *bytes, bool asSource = false);
1998-03-28 05:44:41 +03:00
/*
* Convert a value to a string, returning null after reporting an error,
* otherwise returning a new string reference.
*/
2010-07-15 10:19:36 +04:00
extern JSString *
js_ValueToString(JSContext *cx, const js::Value &v);
1998-03-28 05:44:41 +03:00
namespace js {
/*
* Most code that calls js_ValueToString knows the value is (probably) not a
* string, so it does not make sense to put this inline fast path into
* js_ValueToString.
*/
static JS_ALWAYS_INLINE JSString *
ValueToString_TestForStringInline(JSContext *cx, const Value &v)
{
if (v.isString())
return v.toString();
return js_ValueToString(cx, v);
}
/*
* This function implements E-262-3 section 9.8, toString. Convert the given
* value to a string of jschars appended to the given buffer. On error, the
* passed buffer may have partial results appended.
*/
inline bool
ValueToStringBuffer(JSContext *cx, const Value &v, StringBuffer &sb);
} /* namespace js */
/*
* Convert a value to its source expression, returning null after reporting
* an error, otherwise returning a new string reference.
*/
extern JS_FRIEND_API(JSString *)
2010-07-15 10:19:36 +04:00
js_ValueToSource(JSContext *cx, const js::Value &v);
namespace js {
1998-03-28 05:44:41 +03:00
/*
* Compute a hash function from str. The caller can call this function even if
* str is not a GC-allocated thing.
1998-03-28 05:44:41 +03:00
*/
inline uint32
HashChars(const jschar *chars, size_t length)
{
uint32 h = 0;
for (; length; chars++, length--)
h = JS_ROTATE_LEFT32(h, 4) ^ *chars;
return h;
}
/*
* Test if strings are equal. The caller can call the function even if str1
* or str2 are not GC-allocated things.
*/
extern bool
EqualStrings(JSContext *cx, JSString *str1, JSString *str2, JSBool *result);
/* EqualStrings is infallible on linear strings. */
extern bool
EqualStrings(JSLinearString *str1, JSLinearString *str2);
1998-03-28 05:44:41 +03:00
/*
* Return less than, equal to, or greater than zero depending on whether
* str1 is less than, equal to, or greater than str2.
*/
extern bool
CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32 *result);
/*
* Return true if the string matches the given sequence of ASCII bytes.
*/
extern bool
StringEqualsAscii(JSLinearString *str, const char *asciiBytes);
} /* namespacejs */
1998-03-28 05:44:41 +03:00
extern size_t
js_strlen(const jschar *s);
extern jschar *
js_strchr(const jschar *s, jschar c);
extern jschar *
js_strchr_limit(const jschar *s, jschar c, const jschar *limit);
#define js_strncpy(t, s, n) memcpy((t), (s), (n) * sizeof(jschar))
1998-03-28 05:44:41 +03:00
/*
* Return s advanced past any Unicode white space characters.
*/
static inline const jschar *
js_SkipWhiteSpace(const jschar *s, const jschar *end)
{
JS_ASSERT(s <= end);
while (s != end && JS_ISSPACE(*s))
s++;
return s;
}
namespace js {
/*
* On encodings:
*
* - Some string functions have an optional FlationCoding argument that allow
* the caller to force CESU-8 encoding handling.
* - Functions that don't take a FlationCoding base their NormalEncoding
* behavior on the js_CStringsAreUTF8 value. NormalEncoding is either raw
* (simple zero-extension) or UTF-8 depending on js_CStringsAreUTF8.
* - Functions that explicitly state their encoding do not use the
* js_CStringsAreUTF8 value.
*
* CESU-8 (Compatibility Encoding Scheme for UTF-16: 8-bit) is a variant of
* UTF-8 that allows us to store any wide character string as a narrow
* character string. For strings containing mostly ascii, it saves space.
* http://www.unicode.org/reports/tr26/
*/
enum FlationCoding
{
NormalEncoding,
CESU8Encoding
};
1998-03-28 05:44:41 +03:00
/*
* Inflate bytes to jschars. Return null on error, otherwise return the jschar
* or byte vector that was malloc'ed. length is updated to the length of the
* new string (in jschars).
1998-03-28 05:44:41 +03:00
*/
extern jschar *
InflateString(JSContext *cx, const char *bytes, size_t *length,
FlationCoding fc = NormalEncoding);
1998-03-28 05:44:41 +03:00
extern char *
DeflateString(JSContext *cx, const jschar *chars, size_t length);
1998-03-28 05:44:41 +03:00
/*
* Inflate bytes to JS chars in an existing buffer. 'chars' must be large
* enough for 'length' jschars. The buffer is NOT null-terminated.
*
* charsLength must be be initialized with the destination buffer size and, on
* return, will contain on return the number of copied chars.
*/
extern bool
InflateStringToBuffer(JSContext *cx, const char *bytes, size_t length,
jschar *chars, size_t *charsLength);
extern bool
InflateUTF8StringToBuffer(JSContext *cx, const char *bytes, size_t length,
jschar *chars, size_t *charsLength,
FlationCoding fc = NormalEncoding);
/* Get number of bytes in the deflated sequence of characters. */
extern size_t
GetDeflatedStringLength(JSContext *cx, const jschar *chars, size_t charsLength);
/* This function will never fail (return -1) in CESU-8 mode. */
extern size_t
GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
size_t charsLength,
FlationCoding fc = NormalEncoding);
/*
* Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
* 'length chars. The buffer is NOT null-terminated. The destination length
* must to be initialized with the buffer size and will contain on return the
* number of copied bytes. Conversion behavior depends on js_CStringsAreUTF8.
*/
extern bool
DeflateStringToBuffer(JSContext *cx, const jschar *chars,
size_t charsLength, char *bytes, size_t *length);
/*
* Same as DeflateStringToBuffer, but treats 'bytes' as UTF-8 or CESU-8.
*/
extern bool
DeflateStringToUTF8Buffer(JSContext *cx, const jschar *chars,
size_t charsLength, char *bytes, size_t *length,
FlationCoding fc = NormalEncoding);
} /* namespace js */
/*
* The String.prototype.replace fast-native entry point is exported for joined
* function optimization in js{interp,tracer}.cpp.
*/
namespace js {
extern JSBool
str_replace(JSContext *cx, uintN argc, js::Value *vp);
}
extern JSBool
2010-07-15 10:19:36 +04:00
js_str_toString(JSContext *cx, uintN argc, js::Value *vp);
extern JSBool
js_str_charAt(JSContext *cx, uintN argc, js::Value *vp);
extern JSBool
js_str_charCodeAt(JSContext *cx, uintN argc, js::Value *vp);
/*
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
extern int
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
namespace js {
extern size_t
PutEscapedStringImpl(char *buffer, size_t size, FILE *fp, JSLinearString *str, uint32 quote);
/*
* Write str into buffer escaping any non-printable or non-ASCII character
* using \escapes for JS string literals.
* Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
* the length of the written output, NOT including the NUL. Thus, a return
* value of size or more means that the output was truncated. If buffer
* is null, just returns the length of the output. If quote is not 0, it must
* be a single or double quote character that will quote the output.
*/
inline size_t
PutEscapedString(char *buffer, size_t size, JSLinearString *str, uint32 quote)
{
size_t n = PutEscapedStringImpl(buffer, size, NULL, str, quote);
/* PutEscapedStringImpl can only fail with a file. */
JS_ASSERT(n != size_t(-1));
return n;
}
/*
* Write str into file escaping any non-printable or non-ASCII character.
* If quote is not 0, it must be a single or double quote character that
* will quote the output.
*/
inline bool
FileEscapedString(FILE *fp, JSLinearString *str, uint32 quote)
{
return PutEscapedStringImpl(NULL, 0, fp, str, quote) != size_t(-1);
}
} /* namespace js */
extern JSBool
js_String(JSContext *cx, uintN argc, js::Value *vp);
1998-03-28 05:44:41 +03:00
#endif /* jsstr_h___ */