bug 232182: Allow SpiderMonkey to deal with UTF-8 strings (controlled by a define). Patch from Michael Daumling <daumling@adobe.com>. r=shaver

This commit is contained in:
mrbkap%gmail.com 2005-11-05 01:06:30 +00:00
Родитель d576bf2833
Коммит 809abae6c7
13 изменённых файлов: 456 добавлений и 109 удалений

Просмотреть файл

@ -1564,6 +1564,48 @@ ToInt32(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
return JS_NewNumberValue(cx, i, rval);
}
static JSBool
StringsAreUtf8(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
*rval = JS_StringsAreUTF8 () ? JSVAL_TRUE : JSVAL_FALSE;
return JS_TRUE;
}
static const char* badUtf8 = "...\xC0...";
static const char* bigUtf8 = "...\xFB\xBF\xBF\xBF\xBF...";
static const jschar badSurrogate[] = { 'A', 'B', 'C', 0xDEEE, 'D', 'E', 0 };
static JSBool
TestUtf8(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
intN mode = 1;
jschar chars[20];
size_t charsLength = 5;
char bytes[20];
size_t bytesLength = 20;
if (argc)
JS_ValueToInt32 (cx, *argv, &mode);
switch (mode) {
// mode 1: malformed UTF-8 string (throws error if compiled with UTF-8)
case 1:
JS_NewStringCopyZ (cx, badUtf8);
break;
// mode 2: big UTF-8 character (throws error if compiled with UTF-8)
case 2:
JS_NewStringCopyZ (cx, bigUtf8);
break;
// mode 3: bad Unicode surrogate character (throws error if compiled with UTF-8)
case 3:
JS_EncodeCharacters (cx, badSurrogate, 6, bytes, &bytesLength);
break;
// mode 4: use a too small buffer
case 4:
JS_DecodeBytes(cx, "1234567890", 10, chars, &charsLength);
break;
}
return !JS_IsExceptionPending (cx);
}
static JSFunctionSpec shell_functions[] = {
{"version", Version, 0},
{"options", Options, 0},
@ -1577,6 +1619,8 @@ static JSFunctionSpec shell_functions[] = {
{"untrap", Untrap, 2},
{"line2pc", LineToPC, 0},
{"pc2line", PCToLine, 0},
{"stringsAreUtf8", StringsAreUtf8, 0},
{"testUtf8", TestUtf8, 1},
#ifdef DEBUG
{"dis", Disassemble, 1},
{"dissrc", DisassWithSrc, 1},
@ -1616,6 +1660,8 @@ static char *shell_help_messages[] = {
"untrap(fun[, pc]) Remove a trap",
"line2pc([fun,] line) Map line number to PC",
"pc2line(fun[, pc]) Map PC to line number",
"stringsAreUTF8() Check if strings are UTF-8 encoded",
"testUTF8(mode) Perform UTF-8 tests (modes are 1 to 4)",
#ifdef DEBUG
"dis([fun]) Disassemble functions into bytecodes",
"dissrc([fun]) Disassemble functions with source lines",
@ -1643,14 +1689,14 @@ static char *shell_help_messages[] = {
static void
ShowHelpHeader(void)
{
fprintf(gOutFile, "%-9s %-22s %s\n", "Command", "Usage", "Description");
fprintf(gOutFile, "%-9s %-22s %s\n", "=======", "=====", "===========");
fprintf(gOutFile, "%-14s %-22s %s\n", "Command", "Usage", "Description");
fprintf(gOutFile, "%-14s %-22s %s\n", "=======", "=====", "===========");
}
static void
ShowHelpForCommand(uintN n)
{
fprintf(gOutFile, "%-9.9s %s\n", shell_functions[n].name, shell_help_messages[n]);
fprintf(gOutFile, "%-14.14s %s\n", shell_functions[n].name, shell_help_messages[n]);
}
static JSBool

Просмотреть файл

@ -281,3 +281,7 @@ MSG_DEF(JSMSG_DUPLICATE_XML_ATTR, 198, 1, JSEXN_TYPEERR, "duplicate XML attr
MSG_DEF(JSMSG_TOO_MANY_FUN_VARS, 199, 0, JSEXN_SYNTAXERR, "too many local variables")
MSG_DEF(JSMSG_ARRAY_INIT_TOO_BIG, 200, 0, JSEXN_INTERNALERR, "array initialiser too large")
MSG_DEF(JSMSG_REGEXP_TOO_COMPLEX, 201, 0, JSEXN_INTERNALERR, "regular expression too complex")
MSG_DEF(JSMSG_BUFFER_TOO_SMALL, 202, 0, JSEXN_INTERNALERR, "buffer too small")
MSG_DEF(JSMSG_BAD_SURROGATE_CHAR, 203, 1, JSEXN_TYPEERR, "bad surrogate character {0}")
MSG_DEF(JSMSG_UTF8_CHAR_TOO_LARGE, 204, 1, JSEXN_TYPEERR, "UTF-8 character {0} too large")
MSG_DEF(JSMSG_MALFORMED_UTF8_CHAR, 205, 1, JSEXN_TYPEERR, "malformed UTF-8 character sequence at offset {0}")

Просмотреть файл

@ -3606,7 +3606,7 @@ JS_CompileScript(JSContext *cx, JSObject *obj,
JSScript *script;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return NULL;
script = JS_CompileUCScript(cx, obj, chars, length, filename, lineno);
@ -3624,7 +3624,7 @@ JS_CompileScriptForPrincipals(JSContext *cx, JSObject *obj,
JSScript *script;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return NULL;
script = JS_CompileUCScriptForPrincipals(cx, obj, principals,
@ -3693,7 +3693,7 @@ JS_BufferIsCompilableUnit(JSContext *cx, JSObject *obj,
JSErrorReporter older;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return JS_TRUE;
@ -3816,7 +3816,7 @@ JS_CompileFunction(JSContext *cx, JSObject *obj, const char *name,
JSFunction *fun;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return NULL;
fun = JS_CompileUCFunction(cx, obj, name, nargs, argnames, chars, length,
@ -3836,7 +3836,7 @@ JS_CompileFunctionForPrincipals(JSContext *cx, JSObject *obj,
JSFunction *fun;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return NULL;
fun = JS_CompileUCFunctionForPrincipals(cx, obj, principals, name,
@ -4041,7 +4041,7 @@ JS_EvaluateScript(JSContext *cx, JSObject *obj,
JSBool ok;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return JS_FALSE;
ok = JS_EvaluateUCScript(cx, obj, chars, length, filename, lineno, rval);
@ -4060,7 +4060,7 @@ JS_EvaluateScriptForPrincipals(JSContext *cx, JSObject *obj,
JSBool ok;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return JS_FALSE;
ok = JS_EvaluateUCScriptForPrincipals(cx, obj, principals, chars, length,
@ -4210,15 +4210,16 @@ JS_NewString(JSContext *cx, char *bytes, size_t length)
{
jschar *chars;
JSString *str;
size_t charsLength = length;
CHECK_REQUEST(cx);
/* Make a Unicode vector from the 8-bit char codes in bytes. */
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &charsLength);
if (!chars)
return NULL;
/* Free chars (but not bytes, which caller frees on error) if we fail. */
str = js_NewString(cx, chars, length, 0);
str = js_NewString(cx, chars, charsLength, 0);
if (!str) {
JS_free(cx, chars);
return NULL;
@ -4237,7 +4238,7 @@ JS_NewStringCopyN(JSContext *cx, const char *s, size_t n)
JSString *str;
CHECK_REQUEST(cx);
js = js_InflateString(cx, s, n);
js = js_InflateString(cx, s, &n);
if (!js)
return NULL;
str = js_NewString(cx, js, n, 0);
@ -4257,7 +4258,7 @@ JS_NewStringCopyZ(JSContext *cx, const char *s)
if (!s)
return cx->runtime->emptyString;
n = strlen(s);
js = js_InflateString(cx, s, n);
js = js_InflateString(cx, s, &n);
if (!js)
return NULL;
str = js_NewString(cx, js, n, 0);
@ -4401,6 +4402,28 @@ JS_MakeStringImmutable(JSContext *cx, JSString *str)
return JS_TRUE;
}
JS_PUBLIC_API(JSBool)
JS_EncodeCharacters(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenP)
{
return js_DeflateStringToBuffer (cx, src, srclen, dst, dstlenP);
}
JS_PUBLIC_API(JSBool)
JS_DecodeBytes(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenP)
{
return js_InflateStringToBuffer (cx, src, srclen, dst, dstlenP);
}
JS_PUBLIC_API(JSBool)
JS_StringsAreUTF8 ()
{
#ifdef JS_STRINGS_ARE_UTF8
return JS_TRUE;
#else
return JS_FALSE;
#endif
}
/************************************************************************/
JS_PUBLIC_API(void)
@ -4508,7 +4531,7 @@ JS_NewRegExpObject(JSContext *cx, char *bytes, size_t length, uintN flags)
JSObject *obj;
CHECK_REQUEST(cx);
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return NULL;
obj = js_NewRegExpObject(cx, NULL, chars, length, flags);

Просмотреть файл

@ -1812,6 +1812,36 @@ JS_UndependString(JSContext *cx, JSString *str);
extern JS_PUBLIC_API(JSBool)
JS_MakeStringImmutable(JSContext *cx, JSString *str);
/*
* Return JS_TRUE if all strings are UTF-8. The source must be compiled
* with JS_STRINGS_ARE_UTF8 defined to get UTF-8 support.
*/
JS_PUBLIC_API(JSBool)
JS_StringsAreUTF8();
/*
* Character encoding support
* For both JS_EncodeCharacters and JS_DecodeBytes, set *dstlenp to the size
* of the destination buffer before the call; on return, it contains the number of
* bytes (JS_EncodeCharacters) or jschars (JS_DecodeBytes) actually stored.
* To determine the necessary destination buffer size, set the destination buffer
* pointer to NULL. On errors, the functions report an error. In that case,
* *dstlenp contains the number of characters or bytes transferred so far.
* If cx is NULL, no error is reported on a failure, and the functions just
* return JS_FALSE. Both functions do NOT store an additional 0-byte
* (or 0-jschar) behind the string.
* If the source has been compiled with the #define JS_STRINGS_ARE_UTF8 to
* enable UTF-8 support, JS_EncodeCharacters() encodes to UTF-8, and
* JS_DecodeBytes() decodes from UTF-8, which may create addititional
* errors if the character sequence is malformed. If UTF-8 support is
* disabled, the functions exland bytes to jschars and vice versa.
*/
JS_PUBLIC_API(JSBool)
JS_EncodeCharacters(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenp);
JS_PUBLIC_API(JSBool)
JS_DecodeBytes(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenp);
/************************************************************************/
/*

Просмотреть файл

@ -719,12 +719,15 @@ js_Atomize(JSContext *cx, const char *bytes, size_t length, uintN flags)
*/
#define ATOMIZE_BUF_MAX 32
jschar inflated[ATOMIZE_BUF_MAX];
size_t inflatedLength = ATOMIZE_BUF_MAX - 1;
if (length < ATOMIZE_BUF_MAX) {
js_InflateStringToBuffer(inflated, bytes, length);
js_InflateStringToBuffer(cx, bytes, length, inflated, &inflatedLength);
inflated[inflatedLength] = 0;
chars = inflated;
} else {
chars = js_InflateString(cx, bytes, length);
inflatedLength = length;
chars = js_InflateString(cx, bytes, &inflatedLength);
if (!chars)
return NULL;
flags |= ATOM_NOCOPY;
@ -733,7 +736,7 @@ js_Atomize(JSContext *cx, const char *bytes, size_t length, uintN flags)
str = ALIGN(buf, JSString);
str->chars = chars;
str->length = length;
str->length = inflatedLength;
atom = js_AtomizeString(cx, str, ATOM_TMPSTR | flags);
if (chars != inflated && (!atom || ATOM_TO_STRING(atom)->chars != chars))
JS_free(cx, chars);

Просмотреть файл

@ -843,8 +843,9 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
for (i = 0; i < argCount; i++) {
if (charArgs) {
char *charArg = va_arg(ap, char *);
size_t charArgLength = strlen(charArg);
reportp->messageArgs[i]
= js_InflateString(cx, charArg, strlen(charArg));
= js_InflateString(cx, charArg, &charArgLength);
if (!reportp->messageArgs[i])
goto error;
}
@ -862,12 +863,16 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
*/
if (argCount > 0) {
if (efs->format) {
const char *fmt;
jschar *buffer, *fmt, *out;
const jschar *arg;
jschar *out;
int expandedArgs = 0;
size_t expandedLength
= strlen(efs->format)
size_t expandedLength;
size_t len = strlen (efs->format);
buffer = fmt = js_InflateString (cx, efs->format, &len);
if (!buffer)
goto error;
expandedLength
= len
- (3 * argCount) /* exclude the {n} */
+ totalArgsLength;
/*
@ -876,9 +881,10 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
*/
reportp->ucmessage = out = (jschar *)
JS_malloc(cx, (expandedLength + 1) * sizeof(jschar));
if (!out)
if (!out) {
JS_free (cx, buffer);
goto error;
fmt = efs->format;
}
while (*fmt) {
if (*fmt == '{') {
if (isdigit(fmt[1])) {
@ -892,13 +898,11 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
continue;
}
}
/*
* is this kosher?
*/
*out++ = (unsigned char)(*fmt++);
*out++ = *fmt++;
}
JS_ASSERT(expandedArgs == argCount);
*out = 0;
JS_free (cx, buffer);
*messagep =
js_DeflateString(cx, reportp->ucmessage,
(size_t)(out - reportp->ucmessage));
@ -911,11 +915,13 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
* entire message.
*/
if (efs->format) {
size_t len;
*messagep = JS_strdup(cx, efs->format);
if (!*messagep)
goto error;
len = strlen(*messagep);
reportp->ucmessage
= js_InflateString(cx, *messagep, strlen(*messagep));
= js_InflateString(cx, *messagep, &len);
if (!reportp->ucmessage)
goto error;
}

Просмотреть файл

@ -940,7 +940,7 @@ JS_EvaluateInStackFrame(JSContext *cx, JSStackFrame *fp,
jschar *chars;
JSBool ok;
chars = js_InflateString(cx, bytes, length);
chars = js_InflateString(cx, bytes, &length);
if (!chars)
return JS_FALSE;
ok = JS_EvaluateUCInStackFrame(cx, fp, chars, length, filename, lineno,

Просмотреть файл

@ -568,7 +568,7 @@ js_EnterSharpObject(JSContext *cx, JSObject *obj, JSIdArray **idap,
len = JS_snprintf(buf, sizeof buf, "#%u%c",
sharpid >> SHARP_ID_SHIFT,
(sharpid & SHARP_BIT) ? '#' : '=');
*sp = js_InflateString(cx, buf, len);
*sp = js_InflateString(cx, buf, &len);
if (!*sp) {
if (ida)
JS_DestroyIdArray(cx, ida);

Просмотреть файл

@ -422,8 +422,15 @@ QuoteString(Sprinter *sp, JSString *str, jschar quote)
/* Use js_EscapeMap, \u, or \x only if necessary. */
if ((u = js_strchr(js_EscapeMap, c)) != NULL)
ok = Sprint(sp, "\\%c", (char)u[1]) >= 0;
else
else {
#ifdef JS_STRINGS_ARE_UTF8
/* print as UTF-8 string */
ok = Sprint(sp, "%hc", c) >= 0;
#else
/* Use \uxxxx or \xXX if the string cannot be displayed as UTF-8 */
ok = Sprint(sp, (c >> 8) ? "\\u%04X" : "\\x%02X", c) >= 0;
#endif
}
if (!ok)
return NULL;
}

Просмотреть файл

@ -1336,9 +1336,11 @@ retry:
kw = ATOM_KEYWORD(atom);
if (kw->tokentype == TOK_RESERVED) {
char buf[MAX_KEYWORD_LENGTH + 1];
js_DeflateStringToBuffer(buf, TOKENBUF_BASE(),
TOKENBUF_LENGTH());
size_t buflen = sizeof(buf) - 1;
if (!js_DeflateStringToBuffer(cx, TOKENBUF_BASE(), TOKENBUF_LENGTH(),
buf, &buflen))
goto error;
buf [buflen] = 0;
if (!js_ReportCompileErrorNumber(cx, ts,
JSREPORT_TS |
JSREPORT_WARNING |

Просмотреть файл

@ -267,9 +267,6 @@ static JSBool
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
jsval *rval);
static int
OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
static uint32
Utf8ToOneUcs4Char(const uint8 *utf8Buffer, int utf8Length);
@ -2880,44 +2877,251 @@ js_SkipWhiteSpace(const jschar *s)
return s;
}
#define INFLATE_STRING_BODY \
for (i = 0; i < length; i++) \
chars[i] = (unsigned char) bytes[i]; \
chars[i] = 0;
void
js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length)
{
size_t i;
INFLATE_STRING_BODY
}
#ifdef JS_STRINGS_ARE_UTF8
jschar *
js_InflateString(JSContext *cx, const char *bytes, size_t length)
js_InflateString(JSContext *cx, const char *bytes, size_t *length)
{
jschar *chars;
size_t i;
jschar *chars = NULL;
size_t dstlen = 0;
chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
if (!js_InflateStringToBuffer(cx, bytes, *length, NULL, &dstlen))
return NULL;
chars = (jschar *) JS_malloc(cx, (dstlen + 1) * sizeof (jschar));
if (!chars)
return NULL;
INFLATE_STRING_BODY
js_InflateStringToBuffer(cx, bytes, *length, chars, &dstlen);
chars [dstlen] = 0;
*length = dstlen;
return chars;
}
#define DEFLATE_STRING_BODY \
for (i = 0; i < length; i++) \
bytes[i] = (char) chars[i]; \
bytes[i] = 0;
/*
* May be called with null cx by js_GetStringBytes, see below.
*/
char *
js_DeflateString(JSContext *cx, const jschar *chars, size_t length)
{
size_t size = 0;
char *bytes = NULL;
if (!js_DeflateStringToBuffer (cx, chars, length, NULL, &size))
return NULL;
bytes = (char *) (cx ? JS_malloc(cx, size+1) : malloc(size+1));
if (!bytes)
return NULL;
js_DeflateStringToBuffer (cx, chars, length, bytes, &size);
bytes [size] = 0;
return bytes;
}
void
js_DeflateStringToBuffer(char *bytes, const jschar *chars, size_t length)
JSBool
js_DeflateStringToBuffer(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
{
size_t i, utf8Len, dstlen = *dstlenp, origDstlen = dstlen;
jschar c, c2;
uint32 v;
uint8 utf8buf[6];
if (!dst)
dstlen = origDstlen = (size_t) -1;
while (srclen) {
c = *src++;
srclen--;
if ((c >= 0xDC00) && (c <= 0xDFFF))
goto badSurrogate;
if (c < 0xD800 || c > 0xDBFF) {
v = c;
} else {
if (srclen < 2)
goto bufferTooSmall;
c2 = *src++;
srclen--;
if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
c = c2;
goto badSurrogate;
}
v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
}
if (v < 0x0080) {
// no encoding necessary - performance hack
if (!dstlen)
goto bufferTooSmall;
if (dst)
*dst++ = (char) v;
utf8Len = 1;
} else {
utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
if (utf8Len > dstlen)
goto bufferTooSmall;
if (dst) {
for (i = 0; i < utf8Len; i++)
*dst++ = (char) utf8buf [i];
}
}
dstlen -= utf8Len;
}
*dstlenp = (origDstlen - dstlen);
return JS_TRUE;
badSurrogate:
*dstlenp = (origDstlen - dstlen);
if (cx) {
char buffer [10];
JS_snprintf (buffer, 10, "0x%x", c);
JS_ReportErrorFlagsAndNumber(cx,
JSREPORT_ERROR,
js_GetErrorMessage, NULL,
JSMSG_BAD_SURROGATE_CHAR,
buffer);
}
return JS_FALSE;
bufferTooSmall:
*dstlenp = (origDstlen - dstlen);
if (cx)
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
return JS_FALSE;
}
JSBool
js_InflateStringToBuffer(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenp)
{
uint32 v;
size_t offset = 0, j, n, dstlen = *dstlenp, origDstlen = dstlen;
if (!dst)
dstlen = origDstlen = (size_t) -1;
while (srclen) {
v = (uint8) *src;
n = 1;
if (v & 0x80) {
while (v & (0x80 >> n))
n++;
if (n > srclen)
goto bufferTooSmall;
if (n == 1 || n > 6)
goto badCharacter;
for (j = 1; j < n; j++) {
if ((src [j] & 0xC0) != 0x80)
goto badCharacter;
}
v = Utf8ToOneUcs4Char(src, n);
if (v >= 0x10000) {
v -= 0x10000;
if (v > 0xFFFFF || dstlen < 2) {
*dstlenp = (origDstlen - dstlen);
if (cx) {
char buffer [10];
JS_snprintf (buffer, 10, "0x%x", v + 0x10000);
JS_ReportErrorFlagsAndNumber(cx,
JSREPORT_ERROR,
js_GetErrorMessage, NULL,
JSMSG_UTF8_CHAR_TOO_LARGE,
buffer);
}
return JS_FALSE;
}
if (dstlen < 2)
goto bufferTooSmall;
if (dst) {
*dst++ = (jschar)((v >> 10) + 0xD800);
v = (jschar)((v & 0x3FF) + 0xDC00);
}
dstlen--;
}
}
if (!dstlen)
goto bufferTooSmall;
if (dst)
*dst++ = (jschar) v;
dstlen--;
offset += n;
src += n;
srclen -= n;
}
*dstlenp = (origDstlen - dstlen);
return JS_TRUE;
badCharacter:
*dstlenp = (origDstlen - dstlen);
if (cx) {
char buffer [10];
JS_snprintf (buffer, 10, "%d", offset);
JS_ReportErrorFlagsAndNumber(cx,
JSREPORT_ERROR,
js_GetErrorMessage, NULL,
JSMSG_MALFORMED_UTF8_CHAR,
buffer);
}
return JS_FALSE;
bufferTooSmall:
*dstlenp = (origDstlen - dstlen);
if (cx)
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
return JS_FALSE;
}
#else
JSBool
js_InflateStringToBuffer(JSContext* cx, const char *bytes, size_t length, jschar *chars, size_t* charsLength)
{
size_t i;
DEFLATE_STRING_BODY
if (length > *charsLength) {
for (i = 0; i < *charsLength; i++)
chars[i] = (unsigned char) bytes[i];
if (cx)
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
return JS_FALSE;
}
else {
for (i = 0; i < length; i++)
chars[i] = (unsigned char) bytes[i];
*charsLength = length;
return JS_TRUE;
}
}
jschar *
js_InflateString(JSContext *cx, const char *bytes, size_t *bytesLength)
{
jschar *chars;
size_t i, length = *bytesLength;
chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
if (!chars) {
*bytesLength = 0;
return NULL;
}
for (i = 0; i < length; i++)
chars[i] = (unsigned char) bytes[i];
chars [length] = 0;
*bytesLength = length;
return chars;
}
JSBool
js_DeflateStringToBuffer(JSContext* cx, const jschar *chars, size_t length, char *bytes, size_t* bytesLength)
{
size_t i;
if (length > *bytesLength) {
for (i = 0; i < *bytesLength; i++)
bytes[i] = (char) chars[i];
if (cx)
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
return JS_FALSE;
}
else {
for (i = 0; i < length; i++)
bytes[i] = (char) chars[i];
*bytesLength = length;
return JS_TRUE;
}
}
/*
@ -2934,10 +3138,15 @@ js_DeflateString(JSContext *cx, const jschar *chars, size_t length)
if (!bytes)
return NULL;
DEFLATE_STRING_BODY
for (i = 0; i < length; i++)
bytes[i] = (char) chars[i];
bytes [length] = 0;
return bytes;
}
#endif
static JSHashTable *
GetDeflatedStringCache(void)
{
@ -4357,8 +4566,8 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
const jschar *unescapedSet2, jsval *rval)
{
size_t length, j, k, L;
jschar *chars, C, C2;
uint32 V;
jschar *chars, c, c2;
uint32 v;
uint8 utf8buf[6];
jschar hexBuf[4];
static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
@ -4378,19 +4587,19 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
hexBuf[3] = 0;
chars = JSSTRING_CHARS(str);
for (k = 0; k < length; k++) {
C = chars[k];
if (js_strchr(unescapedSet, C) ||
(unescapedSet2 && js_strchr(unescapedSet2, C))) {
if (!AddCharsToURI(cx, R, &C, 1))
c = chars[k];
if (js_strchr(unescapedSet, c) ||
(unescapedSet2 && js_strchr(unescapedSet2, c))) {
if (!AddCharsToURI(cx, R, &c, 1))
return JS_FALSE;
} else {
if ((C >= 0xDC00) && (C <= 0xDFFF)) {
if ((c >= 0xDC00) && (c <= 0xDFFF)) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
if (C < 0xD800 || C > 0xDBFF) {
V = C;
if (c < 0xD800 || c > 0xDBFF) {
v = c;
} else {
k++;
if (k == length) {
@ -4398,15 +4607,15 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
C2 = chars[k];
if ((C2 < 0xDC00) || (C2 > 0xDFFF)) {
c2 = chars[k];
if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
}
L = OneUcs4ToUtf8Char(utf8buf, V);
L = js_OneUcs4ToUtf8Char(utf8buf, v);
for (j = 0; j < L; j++) {
hexBuf[1] = HexDigits[utf8buf[j] >> 4];
hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
@ -4432,8 +4641,8 @@ static JSBool
Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
{
size_t length, start, k;
jschar *chars, C, H;
uint32 V;
jschar *chars, c, H;
uint32 v;
jsuint B;
uint8 octets[6];
JSString *R;
@ -4451,8 +4660,8 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
chars = JSSTRING_CHARS(str);
for (k = 0; k < length; k++) {
C = chars[k];
if (C == '%') {
c = chars[k];
if (c == '%') {
start = k;
if ((k + 2) >= length)
goto bad;
@ -4461,7 +4670,7 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
k += 2;
if (!(B & 0x80)) {
C = (jschar)B;
c = (jschar)B;
} else {
n = 1;
while (B & (0x80 >> n))
@ -4483,28 +4692,28 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
k += 2;
octets[j] = (char)B;
}
V = Utf8ToOneUcs4Char(octets, n);
if (V >= 0x10000) {
V -= 0x10000;
if (V > 0xFFFFF)
v = Utf8ToOneUcs4Char(octets, n);
if (v >= 0x10000) {
v -= 0x10000;
if (v > 0xFFFFF)
goto bad;
C = (jschar)((V & 0x3FF) + 0xDC00);
H = (jschar)((V >> 10) + 0xD800);
c = (jschar)((v & 0x3FF) + 0xDC00);
H = (jschar)((v >> 10) + 0xD800);
if (!AddCharsToURI(cx, R, &H, 1))
return JS_FALSE;
} else {
C = (jschar)V;
c = (jschar)v;
}
}
if (js_strchr(reservedSet, C)) {
if (js_strchr(reservedSet, c)) {
if (!AddCharsToURI(cx, R, &chars[start], (k - start + 1)))
return JS_FALSE;
} else {
if (!AddCharsToURI(cx, R, &C, 1))
if (!AddCharsToURI(cx, R, &c, 1))
return JS_FALSE;
}
} else {
if (!AddCharsToURI(cx, R, &C, 1))
if (!AddCharsToURI(cx, R, &c, 1))
return JS_FALSE;
}
}
@ -4582,8 +4791,8 @@ str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
static int
OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char)
int
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char)
{
int utf8Length = 1;

Просмотреть файл

@ -424,27 +424,33 @@ js_SkipWhiteSpace(const jschar *s);
/*
* Inflate bytes to JS chars and vice versa. Report out of memory via cx
* and return null on error, otherwise return the jschar or byte vector that
* was JS_malloc'ed.
* was JS_malloc'ed. length is updated with the length of the new string in jschars.
*/
extern jschar *
js_InflateString(JSContext *cx, const char *bytes, size_t length);
js_InflateString(JSContext *cx, const char *bytes, size_t *length);
extern char *
js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
/*
* Inflate bytes to JS chars into a buffer.
* 'chars' must be large enough for 'length'+1 jschars.
* 'chars' must be large enough for 'length' jschars.
* The buffer is NOT null-terminated.
* cx may be NULL, which means no errors are thrown.
* The destination length needs to be initialized with the buffer size, takes the number of chars moved.
*/
extern void
js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
extern JSBool
js_InflateStringToBuffer(JSContext* cx, const char *bytes, size_t length, jschar *chars, size_t* charsLength);
/*
* Deflate JS chars to bytes into a buffer.
* 'bytes' must be large enough for 'length'+1 chars.
* 'bytes' must be large enough for 'length chars.
* The buffer is NOT null-terminated.
* cx may be NULL, which means no errors are thrown.
* The destination length needs to be initialized with the buffer size, takes the number of bytes moved.
*/
extern void
js_DeflateStringToBuffer(char *bytes, const jschar *chars, size_t length);
extern JSBool
js_DeflateStringToBuffer(JSContext* cx, const jschar *chars, size_t charsLength, char *bytes, size_t* length);
/*
* Associate bytes with str in the deflated string cache, returning true on
@ -464,6 +470,13 @@ JSBool
js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
jsval *rval);
/*
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
extern int
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
JS_END_EXTERN_C
#endif /* jsstr_h___ */

Просмотреть файл

@ -1885,7 +1885,7 @@ ParseXMLSource(JSContext *cx, JSString *src)
{
jsval nsval;
JSXMLNamespace *ns;
size_t urilen, srclen, length, offset;
size_t urilen, srclen, length, offset, dstlen;
jschar *chars;
const jschar *srcp, *endp;
void *mark;
@ -1917,16 +1917,20 @@ ParseXMLSource(JSContext *cx, JSString *src)
if (!chars)
return NULL;
js_InflateStringToBuffer(chars, prefix, constrlen(prefix));
offset = constrlen(prefix);
dstlen = length;
js_InflateStringToBuffer(cx, prefix, constrlen(prefix), chars, &dstlen);
offset = dstlen;
js_strncpy(chars + offset, JSSTRING_CHARS(ns->uri), urilen);
offset += urilen;
js_InflateStringToBuffer(chars + offset, middle, constrlen(middle));
offset += constrlen(middle);
dstlen = length - offset + 1;
js_InflateStringToBuffer(cx, middle, constrlen(middle), chars + offset, &dstlen);
offset += dstlen;
srcp = JSSTRING_CHARS(src);
js_strncpy(chars + offset, srcp, srclen);
offset += srclen;
js_InflateStringToBuffer(chars + offset, suffix, constrlen(suffix));
dstlen = length - offset + 1;
js_InflateStringToBuffer(cx, suffix, constrlen(suffix), chars + offset, &dstlen);
chars [offset + dstlen] = 0;
mark = JS_ARENA_MARK(&cx->tempPool);
ts = js_NewBufferTokenStream(cx, chars, length);