зеркало из https://github.com/mozilla/gecko-dev.git
bug 232182: Allow SpiderMonkey to deal with UTF-8 strings (controlled by a define). Patch from Michael Daumling <daumling@adobe.com>. r=shaver
This commit is contained in:
Родитель
d576bf2833
Коммит
809abae6c7
52
js/src/js.c
52
js/src/js.c
|
@ -1564,6 +1564,48 @@ ToInt32(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|||
return JS_NewNumberValue(cx, i, rval);
|
||||
}
|
||||
|
||||
static JSBool
|
||||
StringsAreUtf8(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
*rval = JS_StringsAreUTF8 () ? JSVAL_TRUE : JSVAL_FALSE;
|
||||
return JS_TRUE;
|
||||
}
|
||||
|
||||
static const char* badUtf8 = "...\xC0...";
|
||||
static const char* bigUtf8 = "...\xFB\xBF\xBF\xBF\xBF...";
|
||||
static const jschar badSurrogate[] = { 'A', 'B', 'C', 0xDEEE, 'D', 'E', 0 };
|
||||
|
||||
static JSBool
|
||||
TestUtf8(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
intN mode = 1;
|
||||
jschar chars[20];
|
||||
size_t charsLength = 5;
|
||||
char bytes[20];
|
||||
size_t bytesLength = 20;
|
||||
if (argc)
|
||||
JS_ValueToInt32 (cx, *argv, &mode);
|
||||
switch (mode) {
|
||||
// mode 1: malformed UTF-8 string (throws error if compiled with UTF-8)
|
||||
case 1:
|
||||
JS_NewStringCopyZ (cx, badUtf8);
|
||||
break;
|
||||
// mode 2: big UTF-8 character (throws error if compiled with UTF-8)
|
||||
case 2:
|
||||
JS_NewStringCopyZ (cx, bigUtf8);
|
||||
break;
|
||||
// mode 3: bad Unicode surrogate character (throws error if compiled with UTF-8)
|
||||
case 3:
|
||||
JS_EncodeCharacters (cx, badSurrogate, 6, bytes, &bytesLength);
|
||||
break;
|
||||
// mode 4: use a too small buffer
|
||||
case 4:
|
||||
JS_DecodeBytes(cx, "1234567890", 10, chars, &charsLength);
|
||||
break;
|
||||
}
|
||||
return !JS_IsExceptionPending (cx);
|
||||
}
|
||||
|
||||
static JSFunctionSpec shell_functions[] = {
|
||||
{"version", Version, 0},
|
||||
{"options", Options, 0},
|
||||
|
@ -1577,6 +1619,8 @@ static JSFunctionSpec shell_functions[] = {
|
|||
{"untrap", Untrap, 2},
|
||||
{"line2pc", LineToPC, 0},
|
||||
{"pc2line", PCToLine, 0},
|
||||
{"stringsAreUtf8", StringsAreUtf8, 0},
|
||||
{"testUtf8", TestUtf8, 1},
|
||||
#ifdef DEBUG
|
||||
{"dis", Disassemble, 1},
|
||||
{"dissrc", DisassWithSrc, 1},
|
||||
|
@ -1616,6 +1660,8 @@ static char *shell_help_messages[] = {
|
|||
"untrap(fun[, pc]) Remove a trap",
|
||||
"line2pc([fun,] line) Map line number to PC",
|
||||
"pc2line(fun[, pc]) Map PC to line number",
|
||||
"stringsAreUTF8() Check if strings are UTF-8 encoded",
|
||||
"testUTF8(mode) Perform UTF-8 tests (modes are 1 to 4)",
|
||||
#ifdef DEBUG
|
||||
"dis([fun]) Disassemble functions into bytecodes",
|
||||
"dissrc([fun]) Disassemble functions with source lines",
|
||||
|
@ -1643,14 +1689,14 @@ static char *shell_help_messages[] = {
|
|||
static void
|
||||
ShowHelpHeader(void)
|
||||
{
|
||||
fprintf(gOutFile, "%-9s %-22s %s\n", "Command", "Usage", "Description");
|
||||
fprintf(gOutFile, "%-9s %-22s %s\n", "=======", "=====", "===========");
|
||||
fprintf(gOutFile, "%-14s %-22s %s\n", "Command", "Usage", "Description");
|
||||
fprintf(gOutFile, "%-14s %-22s %s\n", "=======", "=====", "===========");
|
||||
}
|
||||
|
||||
static void
|
||||
ShowHelpForCommand(uintN n)
|
||||
{
|
||||
fprintf(gOutFile, "%-9.9s %s\n", shell_functions[n].name, shell_help_messages[n]);
|
||||
fprintf(gOutFile, "%-14.14s %s\n", shell_functions[n].name, shell_help_messages[n]);
|
||||
}
|
||||
|
||||
static JSBool
|
||||
|
|
|
@ -281,3 +281,7 @@ MSG_DEF(JSMSG_DUPLICATE_XML_ATTR, 198, 1, JSEXN_TYPEERR, "duplicate XML attr
|
|||
MSG_DEF(JSMSG_TOO_MANY_FUN_VARS, 199, 0, JSEXN_SYNTAXERR, "too many local variables")
|
||||
MSG_DEF(JSMSG_ARRAY_INIT_TOO_BIG, 200, 0, JSEXN_INTERNALERR, "array initialiser too large")
|
||||
MSG_DEF(JSMSG_REGEXP_TOO_COMPLEX, 201, 0, JSEXN_INTERNALERR, "regular expression too complex")
|
||||
MSG_DEF(JSMSG_BUFFER_TOO_SMALL, 202, 0, JSEXN_INTERNALERR, "buffer too small")
|
||||
MSG_DEF(JSMSG_BAD_SURROGATE_CHAR, 203, 1, JSEXN_TYPEERR, "bad surrogate character {0}")
|
||||
MSG_DEF(JSMSG_UTF8_CHAR_TOO_LARGE, 204, 1, JSEXN_TYPEERR, "UTF-8 character {0} too large")
|
||||
MSG_DEF(JSMSG_MALFORMED_UTF8_CHAR, 205, 1, JSEXN_TYPEERR, "malformed UTF-8 character sequence at offset {0}")
|
||||
|
|
|
@ -3606,7 +3606,7 @@ JS_CompileScript(JSContext *cx, JSObject *obj,
|
|||
JSScript *script;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
script = JS_CompileUCScript(cx, obj, chars, length, filename, lineno);
|
||||
|
@ -3624,7 +3624,7 @@ JS_CompileScriptForPrincipals(JSContext *cx, JSObject *obj,
|
|||
JSScript *script;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
script = JS_CompileUCScriptForPrincipals(cx, obj, principals,
|
||||
|
@ -3693,7 +3693,7 @@ JS_BufferIsCompilableUnit(JSContext *cx, JSObject *obj,
|
|||
JSErrorReporter older;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return JS_TRUE;
|
||||
|
||||
|
@ -3816,7 +3816,7 @@ JS_CompileFunction(JSContext *cx, JSObject *obj, const char *name,
|
|||
JSFunction *fun;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
fun = JS_CompileUCFunction(cx, obj, name, nargs, argnames, chars, length,
|
||||
|
@ -3836,7 +3836,7 @@ JS_CompileFunctionForPrincipals(JSContext *cx, JSObject *obj,
|
|||
JSFunction *fun;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
fun = JS_CompileUCFunctionForPrincipals(cx, obj, principals, name,
|
||||
|
@ -4041,7 +4041,7 @@ JS_EvaluateScript(JSContext *cx, JSObject *obj,
|
|||
JSBool ok;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return JS_FALSE;
|
||||
ok = JS_EvaluateUCScript(cx, obj, chars, length, filename, lineno, rval);
|
||||
|
@ -4060,7 +4060,7 @@ JS_EvaluateScriptForPrincipals(JSContext *cx, JSObject *obj,
|
|||
JSBool ok;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return JS_FALSE;
|
||||
ok = JS_EvaluateUCScriptForPrincipals(cx, obj, principals, chars, length,
|
||||
|
@ -4210,15 +4210,16 @@ JS_NewString(JSContext *cx, char *bytes, size_t length)
|
|||
{
|
||||
jschar *chars;
|
||||
JSString *str;
|
||||
size_t charsLength = length;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
/* Make a Unicode vector from the 8-bit char codes in bytes. */
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &charsLength);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
|
||||
/* Free chars (but not bytes, which caller frees on error) if we fail. */
|
||||
str = js_NewString(cx, chars, length, 0);
|
||||
str = js_NewString(cx, chars, charsLength, 0);
|
||||
if (!str) {
|
||||
JS_free(cx, chars);
|
||||
return NULL;
|
||||
|
@ -4237,7 +4238,7 @@ JS_NewStringCopyN(JSContext *cx, const char *s, size_t n)
|
|||
JSString *str;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
js = js_InflateString(cx, s, n);
|
||||
js = js_InflateString(cx, s, &n);
|
||||
if (!js)
|
||||
return NULL;
|
||||
str = js_NewString(cx, js, n, 0);
|
||||
|
@ -4257,7 +4258,7 @@ JS_NewStringCopyZ(JSContext *cx, const char *s)
|
|||
if (!s)
|
||||
return cx->runtime->emptyString;
|
||||
n = strlen(s);
|
||||
js = js_InflateString(cx, s, n);
|
||||
js = js_InflateString(cx, s, &n);
|
||||
if (!js)
|
||||
return NULL;
|
||||
str = js_NewString(cx, js, n, 0);
|
||||
|
@ -4401,6 +4402,28 @@ JS_MakeStringImmutable(JSContext *cx, JSString *str)
|
|||
return JS_TRUE;
|
||||
}
|
||||
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_EncodeCharacters(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenP)
|
||||
{
|
||||
return js_DeflateStringToBuffer (cx, src, srclen, dst, dstlenP);
|
||||
}
|
||||
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_DecodeBytes(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenP)
|
||||
{
|
||||
return js_InflateStringToBuffer (cx, src, srclen, dst, dstlenP);
|
||||
}
|
||||
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_StringsAreUTF8 ()
|
||||
{
|
||||
#ifdef JS_STRINGS_ARE_UTF8
|
||||
return JS_TRUE;
|
||||
#else
|
||||
return JS_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
||||
JS_PUBLIC_API(void)
|
||||
|
@ -4508,7 +4531,7 @@ JS_NewRegExpObject(JSContext *cx, char *bytes, size_t length, uintN flags)
|
|||
JSObject *obj;
|
||||
|
||||
CHECK_REQUEST(cx);
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
obj = js_NewRegExpObject(cx, NULL, chars, length, flags);
|
||||
|
|
|
@ -1812,6 +1812,36 @@ JS_UndependString(JSContext *cx, JSString *str);
|
|||
extern JS_PUBLIC_API(JSBool)
|
||||
JS_MakeStringImmutable(JSContext *cx, JSString *str);
|
||||
|
||||
/*
|
||||
* Return JS_TRUE if all strings are UTF-8. The source must be compiled
|
||||
* with JS_STRINGS_ARE_UTF8 defined to get UTF-8 support.
|
||||
*/
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_StringsAreUTF8();
|
||||
|
||||
/*
|
||||
* Character encoding support
|
||||
* For both JS_EncodeCharacters and JS_DecodeBytes, set *dstlenp to the size
|
||||
* of the destination buffer before the call; on return, it contains the number of
|
||||
* bytes (JS_EncodeCharacters) or jschars (JS_DecodeBytes) actually stored.
|
||||
* To determine the necessary destination buffer size, set the destination buffer
|
||||
* pointer to NULL. On errors, the functions report an error. In that case,
|
||||
* *dstlenp contains the number of characters or bytes transferred so far.
|
||||
* If cx is NULL, no error is reported on a failure, and the functions just
|
||||
* return JS_FALSE. Both functions do NOT store an additional 0-byte
|
||||
* (or 0-jschar) behind the string.
|
||||
* If the source has been compiled with the #define JS_STRINGS_ARE_UTF8 to
|
||||
* enable UTF-8 support, JS_EncodeCharacters() encodes to UTF-8, and
|
||||
* JS_DecodeBytes() decodes from UTF-8, which may create addititional
|
||||
* errors if the character sequence is malformed. If UTF-8 support is
|
||||
* disabled, the functions exland bytes to jschars and vice versa.
|
||||
*/
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_EncodeCharacters(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenp);
|
||||
|
||||
JS_PUBLIC_API(JSBool)
|
||||
JS_DecodeBytes(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenp);
|
||||
|
||||
/************************************************************************/
|
||||
|
||||
/*
|
||||
|
|
|
@ -719,12 +719,15 @@ js_Atomize(JSContext *cx, const char *bytes, size_t length, uintN flags)
|
|||
*/
|
||||
#define ATOMIZE_BUF_MAX 32
|
||||
jschar inflated[ATOMIZE_BUF_MAX];
|
||||
size_t inflatedLength = ATOMIZE_BUF_MAX - 1;
|
||||
|
||||
if (length < ATOMIZE_BUF_MAX) {
|
||||
js_InflateStringToBuffer(inflated, bytes, length);
|
||||
js_InflateStringToBuffer(cx, bytes, length, inflated, &inflatedLength);
|
||||
inflated[inflatedLength] = 0;
|
||||
chars = inflated;
|
||||
} else {
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
inflatedLength = length;
|
||||
chars = js_InflateString(cx, bytes, &inflatedLength);
|
||||
if (!chars)
|
||||
return NULL;
|
||||
flags |= ATOM_NOCOPY;
|
||||
|
@ -733,7 +736,7 @@ js_Atomize(JSContext *cx, const char *bytes, size_t length, uintN flags)
|
|||
str = ALIGN(buf, JSString);
|
||||
|
||||
str->chars = chars;
|
||||
str->length = length;
|
||||
str->length = inflatedLength;
|
||||
atom = js_AtomizeString(cx, str, ATOM_TMPSTR | flags);
|
||||
if (chars != inflated && (!atom || ATOM_TO_STRING(atom)->chars != chars))
|
||||
JS_free(cx, chars);
|
||||
|
|
|
@ -843,8 +843,9 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
|
|||
for (i = 0; i < argCount; i++) {
|
||||
if (charArgs) {
|
||||
char *charArg = va_arg(ap, char *);
|
||||
size_t charArgLength = strlen(charArg);
|
||||
reportp->messageArgs[i]
|
||||
= js_InflateString(cx, charArg, strlen(charArg));
|
||||
= js_InflateString(cx, charArg, &charArgLength);
|
||||
if (!reportp->messageArgs[i])
|
||||
goto error;
|
||||
}
|
||||
|
@ -862,12 +863,16 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
|
|||
*/
|
||||
if (argCount > 0) {
|
||||
if (efs->format) {
|
||||
const char *fmt;
|
||||
jschar *buffer, *fmt, *out;
|
||||
const jschar *arg;
|
||||
jschar *out;
|
||||
int expandedArgs = 0;
|
||||
size_t expandedLength
|
||||
= strlen(efs->format)
|
||||
size_t expandedLength;
|
||||
size_t len = strlen (efs->format);
|
||||
buffer = fmt = js_InflateString (cx, efs->format, &len);
|
||||
if (!buffer)
|
||||
goto error;
|
||||
expandedLength
|
||||
= len
|
||||
- (3 * argCount) /* exclude the {n} */
|
||||
+ totalArgsLength;
|
||||
/*
|
||||
|
@ -876,9 +881,10 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
|
|||
*/
|
||||
reportp->ucmessage = out = (jschar *)
|
||||
JS_malloc(cx, (expandedLength + 1) * sizeof(jschar));
|
||||
if (!out)
|
||||
if (!out) {
|
||||
JS_free (cx, buffer);
|
||||
goto error;
|
||||
fmt = efs->format;
|
||||
}
|
||||
while (*fmt) {
|
||||
if (*fmt == '{') {
|
||||
if (isdigit(fmt[1])) {
|
||||
|
@ -892,13 +898,11 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
|
|||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* is this kosher?
|
||||
*/
|
||||
*out++ = (unsigned char)(*fmt++);
|
||||
*out++ = *fmt++;
|
||||
}
|
||||
JS_ASSERT(expandedArgs == argCount);
|
||||
*out = 0;
|
||||
JS_free (cx, buffer);
|
||||
*messagep =
|
||||
js_DeflateString(cx, reportp->ucmessage,
|
||||
(size_t)(out - reportp->ucmessage));
|
||||
|
@ -911,11 +915,13 @@ js_ExpandErrorArguments(JSContext *cx, JSErrorCallback callback,
|
|||
* entire message.
|
||||
*/
|
||||
if (efs->format) {
|
||||
size_t len;
|
||||
*messagep = JS_strdup(cx, efs->format);
|
||||
if (!*messagep)
|
||||
goto error;
|
||||
len = strlen(*messagep);
|
||||
reportp->ucmessage
|
||||
= js_InflateString(cx, *messagep, strlen(*messagep));
|
||||
= js_InflateString(cx, *messagep, &len);
|
||||
if (!reportp->ucmessage)
|
||||
goto error;
|
||||
}
|
||||
|
|
|
@ -940,7 +940,7 @@ JS_EvaluateInStackFrame(JSContext *cx, JSStackFrame *fp,
|
|||
jschar *chars;
|
||||
JSBool ok;
|
||||
|
||||
chars = js_InflateString(cx, bytes, length);
|
||||
chars = js_InflateString(cx, bytes, &length);
|
||||
if (!chars)
|
||||
return JS_FALSE;
|
||||
ok = JS_EvaluateUCInStackFrame(cx, fp, chars, length, filename, lineno,
|
||||
|
|
|
@ -568,7 +568,7 @@ js_EnterSharpObject(JSContext *cx, JSObject *obj, JSIdArray **idap,
|
|||
len = JS_snprintf(buf, sizeof buf, "#%u%c",
|
||||
sharpid >> SHARP_ID_SHIFT,
|
||||
(sharpid & SHARP_BIT) ? '#' : '=');
|
||||
*sp = js_InflateString(cx, buf, len);
|
||||
*sp = js_InflateString(cx, buf, &len);
|
||||
if (!*sp) {
|
||||
if (ida)
|
||||
JS_DestroyIdArray(cx, ida);
|
||||
|
|
|
@ -422,8 +422,15 @@ QuoteString(Sprinter *sp, JSString *str, jschar quote)
|
|||
/* Use js_EscapeMap, \u, or \x only if necessary. */
|
||||
if ((u = js_strchr(js_EscapeMap, c)) != NULL)
|
||||
ok = Sprint(sp, "\\%c", (char)u[1]) >= 0;
|
||||
else
|
||||
else {
|
||||
#ifdef JS_STRINGS_ARE_UTF8
|
||||
/* print as UTF-8 string */
|
||||
ok = Sprint(sp, "%hc", c) >= 0;
|
||||
#else
|
||||
/* Use \uxxxx or \xXX if the string cannot be displayed as UTF-8 */
|
||||
ok = Sprint(sp, (c >> 8) ? "\\u%04X" : "\\x%02X", c) >= 0;
|
||||
#endif
|
||||
}
|
||||
if (!ok)
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -1336,9 +1336,11 @@ retry:
|
|||
kw = ATOM_KEYWORD(atom);
|
||||
if (kw->tokentype == TOK_RESERVED) {
|
||||
char buf[MAX_KEYWORD_LENGTH + 1];
|
||||
|
||||
js_DeflateStringToBuffer(buf, TOKENBUF_BASE(),
|
||||
TOKENBUF_LENGTH());
|
||||
size_t buflen = sizeof(buf) - 1;
|
||||
if (!js_DeflateStringToBuffer(cx, TOKENBUF_BASE(), TOKENBUF_LENGTH(),
|
||||
buf, &buflen))
|
||||
goto error;
|
||||
buf [buflen] = 0;
|
||||
if (!js_ReportCompileErrorNumber(cx, ts,
|
||||
JSREPORT_TS |
|
||||
JSREPORT_WARNING |
|
||||
|
|
327
js/src/jsstr.c
327
js/src/jsstr.c
|
@ -267,9 +267,6 @@ static JSBool
|
|||
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
||||
jsval *rval);
|
||||
|
||||
static int
|
||||
OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
|
||||
|
||||
static uint32
|
||||
Utf8ToOneUcs4Char(const uint8 *utf8Buffer, int utf8Length);
|
||||
|
||||
|
@ -2880,44 +2877,251 @@ js_SkipWhiteSpace(const jschar *s)
|
|||
return s;
|
||||
}
|
||||
|
||||
#define INFLATE_STRING_BODY \
|
||||
for (i = 0; i < length; i++) \
|
||||
chars[i] = (unsigned char) bytes[i]; \
|
||||
chars[i] = 0;
|
||||
|
||||
void
|
||||
js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
INFLATE_STRING_BODY
|
||||
}
|
||||
#ifdef JS_STRINGS_ARE_UTF8
|
||||
|
||||
jschar *
|
||||
js_InflateString(JSContext *cx, const char *bytes, size_t length)
|
||||
js_InflateString(JSContext *cx, const char *bytes, size_t *length)
|
||||
{
|
||||
jschar *chars;
|
||||
size_t i;
|
||||
jschar *chars = NULL;
|
||||
size_t dstlen = 0;
|
||||
|
||||
chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
|
||||
if (!js_InflateStringToBuffer(cx, bytes, *length, NULL, &dstlen))
|
||||
return NULL;
|
||||
chars = (jschar *) JS_malloc(cx, (dstlen + 1) * sizeof (jschar));
|
||||
if (!chars)
|
||||
return NULL;
|
||||
|
||||
INFLATE_STRING_BODY
|
||||
js_InflateStringToBuffer(cx, bytes, *length, chars, &dstlen);
|
||||
chars [dstlen] = 0;
|
||||
*length = dstlen;
|
||||
return chars;
|
||||
}
|
||||
|
||||
#define DEFLATE_STRING_BODY \
|
||||
for (i = 0; i < length; i++) \
|
||||
bytes[i] = (char) chars[i]; \
|
||||
bytes[i] = 0;
|
||||
/*
|
||||
* May be called with null cx by js_GetStringBytes, see below.
|
||||
*/
|
||||
char *
|
||||
js_DeflateString(JSContext *cx, const jschar *chars, size_t length)
|
||||
{
|
||||
size_t size = 0;
|
||||
char *bytes = NULL;
|
||||
if (!js_DeflateStringToBuffer (cx, chars, length, NULL, &size))
|
||||
return NULL;
|
||||
bytes = (char *) (cx ? JS_malloc(cx, size+1) : malloc(size+1));
|
||||
if (!bytes)
|
||||
return NULL;
|
||||
js_DeflateStringToBuffer (cx, chars, length, bytes, &size);
|
||||
bytes [size] = 0;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
void
|
||||
js_DeflateStringToBuffer(char *bytes, const jschar *chars, size_t length)
|
||||
JSBool
|
||||
js_DeflateStringToBuffer(JSContext *cx, const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
|
||||
{
|
||||
size_t i, utf8Len, dstlen = *dstlenp, origDstlen = dstlen;
|
||||
jschar c, c2;
|
||||
uint32 v;
|
||||
uint8 utf8buf[6];
|
||||
|
||||
if (!dst)
|
||||
dstlen = origDstlen = (size_t) -1;
|
||||
|
||||
while (srclen) {
|
||||
c = *src++;
|
||||
srclen--;
|
||||
if ((c >= 0xDC00) && (c <= 0xDFFF))
|
||||
goto badSurrogate;
|
||||
if (c < 0xD800 || c > 0xDBFF) {
|
||||
v = c;
|
||||
} else {
|
||||
if (srclen < 2)
|
||||
goto bufferTooSmall;
|
||||
c2 = *src++;
|
||||
srclen--;
|
||||
if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
|
||||
c = c2;
|
||||
goto badSurrogate;
|
||||
}
|
||||
v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
|
||||
}
|
||||
if (v < 0x0080) {
|
||||
// no encoding necessary - performance hack
|
||||
if (!dstlen)
|
||||
goto bufferTooSmall;
|
||||
if (dst)
|
||||
*dst++ = (char) v;
|
||||
utf8Len = 1;
|
||||
} else {
|
||||
utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
|
||||
if (utf8Len > dstlen)
|
||||
goto bufferTooSmall;
|
||||
if (dst) {
|
||||
for (i = 0; i < utf8Len; i++)
|
||||
*dst++ = (char) utf8buf [i];
|
||||
}
|
||||
}
|
||||
dstlen -= utf8Len;
|
||||
}
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
return JS_TRUE;
|
||||
|
||||
badSurrogate:
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx) {
|
||||
char buffer [10];
|
||||
JS_snprintf (buffer, 10, "0x%x", c);
|
||||
JS_ReportErrorFlagsAndNumber(cx,
|
||||
JSREPORT_ERROR,
|
||||
js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_SURROGATE_CHAR,
|
||||
buffer);
|
||||
}
|
||||
return JS_FALSE;
|
||||
|
||||
bufferTooSmall:
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx)
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
|
||||
JSBool
|
||||
js_InflateStringToBuffer(JSContext *cx, const char *src, size_t srclen, jschar* dst, size_t* dstlenp)
|
||||
{
|
||||
uint32 v;
|
||||
size_t offset = 0, j, n, dstlen = *dstlenp, origDstlen = dstlen;
|
||||
|
||||
if (!dst)
|
||||
dstlen = origDstlen = (size_t) -1;
|
||||
|
||||
while (srclen) {
|
||||
v = (uint8) *src;
|
||||
n = 1;
|
||||
if (v & 0x80) {
|
||||
while (v & (0x80 >> n))
|
||||
n++;
|
||||
if (n > srclen)
|
||||
goto bufferTooSmall;
|
||||
if (n == 1 || n > 6)
|
||||
goto badCharacter;
|
||||
for (j = 1; j < n; j++) {
|
||||
if ((src [j] & 0xC0) != 0x80)
|
||||
goto badCharacter;
|
||||
}
|
||||
v = Utf8ToOneUcs4Char(src, n);
|
||||
if (v >= 0x10000) {
|
||||
v -= 0x10000;
|
||||
if (v > 0xFFFFF || dstlen < 2) {
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx) {
|
||||
char buffer [10];
|
||||
JS_snprintf (buffer, 10, "0x%x", v + 0x10000);
|
||||
JS_ReportErrorFlagsAndNumber(cx,
|
||||
JSREPORT_ERROR,
|
||||
js_GetErrorMessage, NULL,
|
||||
JSMSG_UTF8_CHAR_TOO_LARGE,
|
||||
buffer);
|
||||
}
|
||||
return JS_FALSE;
|
||||
}
|
||||
if (dstlen < 2)
|
||||
goto bufferTooSmall;
|
||||
if (dst) {
|
||||
*dst++ = (jschar)((v >> 10) + 0xD800);
|
||||
v = (jschar)((v & 0x3FF) + 0xDC00);
|
||||
}
|
||||
dstlen--;
|
||||
}
|
||||
}
|
||||
if (!dstlen)
|
||||
goto bufferTooSmall;
|
||||
if (dst)
|
||||
*dst++ = (jschar) v;
|
||||
dstlen--;
|
||||
offset += n;
|
||||
src += n;
|
||||
srclen -= n;
|
||||
}
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
return JS_TRUE;
|
||||
|
||||
badCharacter:
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx) {
|
||||
char buffer [10];
|
||||
JS_snprintf (buffer, 10, "%d", offset);
|
||||
JS_ReportErrorFlagsAndNumber(cx,
|
||||
JSREPORT_ERROR,
|
||||
js_GetErrorMessage, NULL,
|
||||
JSMSG_MALFORMED_UTF8_CHAR,
|
||||
buffer);
|
||||
}
|
||||
return JS_FALSE;
|
||||
|
||||
bufferTooSmall:
|
||||
*dstlenp = (origDstlen - dstlen);
|
||||
if (cx)
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
JSBool
|
||||
js_InflateStringToBuffer(JSContext* cx, const char *bytes, size_t length, jschar *chars, size_t* charsLength)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
DEFLATE_STRING_BODY
|
||||
if (length > *charsLength) {
|
||||
for (i = 0; i < *charsLength; i++)
|
||||
chars[i] = (unsigned char) bytes[i];
|
||||
if (cx)
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < length; i++)
|
||||
chars[i] = (unsigned char) bytes[i];
|
||||
*charsLength = length;
|
||||
return JS_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
jschar *
|
||||
js_InflateString(JSContext *cx, const char *bytes, size_t *bytesLength)
|
||||
{
|
||||
jschar *chars;
|
||||
size_t i, length = *bytesLength;
|
||||
|
||||
chars = (jschar *) JS_malloc(cx, (length + 1) * sizeof(jschar));
|
||||
if (!chars) {
|
||||
*bytesLength = 0;
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < length; i++)
|
||||
chars[i] = (unsigned char) bytes[i];
|
||||
chars [length] = 0;
|
||||
*bytesLength = length;
|
||||
return chars;
|
||||
}
|
||||
|
||||
JSBool
|
||||
js_DeflateStringToBuffer(JSContext* cx, const jschar *chars, size_t length, char *bytes, size_t* bytesLength)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (length > *bytesLength) {
|
||||
for (i = 0; i < *bytesLength; i++)
|
||||
bytes[i] = (char) chars[i];
|
||||
if (cx)
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BUFFER_TOO_SMALL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < length; i++)
|
||||
bytes[i] = (char) chars[i];
|
||||
*bytesLength = length;
|
||||
return JS_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2934,10 +3138,15 @@ js_DeflateString(JSContext *cx, const jschar *chars, size_t length)
|
|||
if (!bytes)
|
||||
return NULL;
|
||||
|
||||
DEFLATE_STRING_BODY
|
||||
for (i = 0; i < length; i++)
|
||||
bytes[i] = (char) chars[i];
|
||||
|
||||
bytes [length] = 0;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static JSHashTable *
|
||||
GetDeflatedStringCache(void)
|
||||
{
|
||||
|
@ -4357,8 +4566,8 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
|
|||
const jschar *unescapedSet2, jsval *rval)
|
||||
{
|
||||
size_t length, j, k, L;
|
||||
jschar *chars, C, C2;
|
||||
uint32 V;
|
||||
jschar *chars, c, c2;
|
||||
uint32 v;
|
||||
uint8 utf8buf[6];
|
||||
jschar hexBuf[4];
|
||||
static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
|
||||
|
@ -4378,19 +4587,19 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
|
|||
hexBuf[3] = 0;
|
||||
chars = JSSTRING_CHARS(str);
|
||||
for (k = 0; k < length; k++) {
|
||||
C = chars[k];
|
||||
if (js_strchr(unescapedSet, C) ||
|
||||
(unescapedSet2 && js_strchr(unescapedSet2, C))) {
|
||||
if (!AddCharsToURI(cx, R, &C, 1))
|
||||
c = chars[k];
|
||||
if (js_strchr(unescapedSet, c) ||
|
||||
(unescapedSet2 && js_strchr(unescapedSet2, c))) {
|
||||
if (!AddCharsToURI(cx, R, &c, 1))
|
||||
return JS_FALSE;
|
||||
} else {
|
||||
if ((C >= 0xDC00) && (C <= 0xDFFF)) {
|
||||
if ((c >= 0xDC00) && (c <= 0xDFFF)) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
if (C < 0xD800 || C > 0xDBFF) {
|
||||
V = C;
|
||||
if (c < 0xD800 || c > 0xDBFF) {
|
||||
v = c;
|
||||
} else {
|
||||
k++;
|
||||
if (k == length) {
|
||||
|
@ -4398,15 +4607,15 @@ Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
|
|||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
C2 = chars[k];
|
||||
if ((C2 < 0xDC00) || (C2 > 0xDFFF)) {
|
||||
c2 = chars[k];
|
||||
if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
|
||||
v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
|
||||
}
|
||||
L = OneUcs4ToUtf8Char(utf8buf, V);
|
||||
L = js_OneUcs4ToUtf8Char(utf8buf, v);
|
||||
for (j = 0; j < L; j++) {
|
||||
hexBuf[1] = HexDigits[utf8buf[j] >> 4];
|
||||
hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
|
||||
|
@ -4432,8 +4641,8 @@ static JSBool
|
|||
Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
|
||||
{
|
||||
size_t length, start, k;
|
||||
jschar *chars, C, H;
|
||||
uint32 V;
|
||||
jschar *chars, c, H;
|
||||
uint32 v;
|
||||
jsuint B;
|
||||
uint8 octets[6];
|
||||
JSString *R;
|
||||
|
@ -4451,8 +4660,8 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
|
|||
|
||||
chars = JSSTRING_CHARS(str);
|
||||
for (k = 0; k < length; k++) {
|
||||
C = chars[k];
|
||||
if (C == '%') {
|
||||
c = chars[k];
|
||||
if (c == '%') {
|
||||
start = k;
|
||||
if ((k + 2) >= length)
|
||||
goto bad;
|
||||
|
@ -4461,7 +4670,7 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
|
|||
B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
|
||||
k += 2;
|
||||
if (!(B & 0x80)) {
|
||||
C = (jschar)B;
|
||||
c = (jschar)B;
|
||||
} else {
|
||||
n = 1;
|
||||
while (B & (0x80 >> n))
|
||||
|
@ -4483,28 +4692,28 @@ Decode(JSContext *cx, JSString *str, const jschar *reservedSet, jsval *rval)
|
|||
k += 2;
|
||||
octets[j] = (char)B;
|
||||
}
|
||||
V = Utf8ToOneUcs4Char(octets, n);
|
||||
if (V >= 0x10000) {
|
||||
V -= 0x10000;
|
||||
if (V > 0xFFFFF)
|
||||
v = Utf8ToOneUcs4Char(octets, n);
|
||||
if (v >= 0x10000) {
|
||||
v -= 0x10000;
|
||||
if (v > 0xFFFFF)
|
||||
goto bad;
|
||||
C = (jschar)((V & 0x3FF) + 0xDC00);
|
||||
H = (jschar)((V >> 10) + 0xD800);
|
||||
c = (jschar)((v & 0x3FF) + 0xDC00);
|
||||
H = (jschar)((v >> 10) + 0xD800);
|
||||
if (!AddCharsToURI(cx, R, &H, 1))
|
||||
return JS_FALSE;
|
||||
} else {
|
||||
C = (jschar)V;
|
||||
c = (jschar)v;
|
||||
}
|
||||
}
|
||||
if (js_strchr(reservedSet, C)) {
|
||||
if (js_strchr(reservedSet, c)) {
|
||||
if (!AddCharsToURI(cx, R, &chars[start], (k - start + 1)))
|
||||
return JS_FALSE;
|
||||
} else {
|
||||
if (!AddCharsToURI(cx, R, &C, 1))
|
||||
if (!AddCharsToURI(cx, R, &c, 1))
|
||||
return JS_FALSE;
|
||||
}
|
||||
} else {
|
||||
if (!AddCharsToURI(cx, R, &C, 1))
|
||||
if (!AddCharsToURI(cx, R, &c, 1))
|
||||
return JS_FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -4582,8 +4791,8 @@ str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|||
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
|
||||
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
|
||||
*/
|
||||
static int
|
||||
OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char)
|
||||
int
|
||||
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char)
|
||||
{
|
||||
int utf8Length = 1;
|
||||
|
||||
|
|
|
@ -424,27 +424,33 @@ js_SkipWhiteSpace(const jschar *s);
|
|||
/*
|
||||
* Inflate bytes to JS chars and vice versa. Report out of memory via cx
|
||||
* and return null on error, otherwise return the jschar or byte vector that
|
||||
* was JS_malloc'ed.
|
||||
* was JS_malloc'ed. length is updated with the length of the new string in jschars.
|
||||
*/
|
||||
extern jschar *
|
||||
js_InflateString(JSContext *cx, const char *bytes, size_t length);
|
||||
js_InflateString(JSContext *cx, const char *bytes, size_t *length);
|
||||
|
||||
extern char *
|
||||
js_DeflateString(JSContext *cx, const jschar *chars, size_t length);
|
||||
|
||||
/*
|
||||
* Inflate bytes to JS chars into a buffer.
|
||||
* 'chars' must be large enough for 'length'+1 jschars.
|
||||
* 'chars' must be large enough for 'length' jschars.
|
||||
* The buffer is NOT null-terminated.
|
||||
* cx may be NULL, which means no errors are thrown.
|
||||
* The destination length needs to be initialized with the buffer size, takes the number of chars moved.
|
||||
*/
|
||||
extern void
|
||||
js_InflateStringToBuffer(jschar *chars, const char *bytes, size_t length);
|
||||
extern JSBool
|
||||
js_InflateStringToBuffer(JSContext* cx, const char *bytes, size_t length, jschar *chars, size_t* charsLength);
|
||||
|
||||
/*
|
||||
* Deflate JS chars to bytes into a buffer.
|
||||
* 'bytes' must be large enough for 'length'+1 chars.
|
||||
* 'bytes' must be large enough for 'length chars.
|
||||
* The buffer is NOT null-terminated.
|
||||
* cx may be NULL, which means no errors are thrown.
|
||||
* The destination length needs to be initialized with the buffer size, takes the number of bytes moved.
|
||||
*/
|
||||
extern void
|
||||
js_DeflateStringToBuffer(char *bytes, const jschar *chars, size_t length);
|
||||
extern JSBool
|
||||
js_DeflateStringToBuffer(JSContext* cx, const jschar *chars, size_t charsLength, char *bytes, size_t* length);
|
||||
|
||||
/*
|
||||
* Associate bytes with str in the deflated string cache, returning true on
|
||||
|
@ -464,6 +470,13 @@ JSBool
|
|||
js_str_escape(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
||||
jsval *rval);
|
||||
|
||||
/*
|
||||
* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
|
||||
* least 6 bytes long. Return the number of UTF-8 bytes of data written.
|
||||
*/
|
||||
extern int
|
||||
js_OneUcs4ToUtf8Char(uint8 *utf8Buffer, uint32 ucs4Char);
|
||||
|
||||
JS_END_EXTERN_C
|
||||
|
||||
#endif /* jsstr_h___ */
|
||||
|
|
|
@ -1885,7 +1885,7 @@ ParseXMLSource(JSContext *cx, JSString *src)
|
|||
{
|
||||
jsval nsval;
|
||||
JSXMLNamespace *ns;
|
||||
size_t urilen, srclen, length, offset;
|
||||
size_t urilen, srclen, length, offset, dstlen;
|
||||
jschar *chars;
|
||||
const jschar *srcp, *endp;
|
||||
void *mark;
|
||||
|
@ -1917,16 +1917,20 @@ ParseXMLSource(JSContext *cx, JSString *src)
|
|||
if (!chars)
|
||||
return NULL;
|
||||
|
||||
js_InflateStringToBuffer(chars, prefix, constrlen(prefix));
|
||||
offset = constrlen(prefix);
|
||||
dstlen = length;
|
||||
js_InflateStringToBuffer(cx, prefix, constrlen(prefix), chars, &dstlen);
|
||||
offset = dstlen;
|
||||
js_strncpy(chars + offset, JSSTRING_CHARS(ns->uri), urilen);
|
||||
offset += urilen;
|
||||
js_InflateStringToBuffer(chars + offset, middle, constrlen(middle));
|
||||
offset += constrlen(middle);
|
||||
dstlen = length - offset + 1;
|
||||
js_InflateStringToBuffer(cx, middle, constrlen(middle), chars + offset, &dstlen);
|
||||
offset += dstlen;
|
||||
srcp = JSSTRING_CHARS(src);
|
||||
js_strncpy(chars + offset, srcp, srclen);
|
||||
offset += srclen;
|
||||
js_InflateStringToBuffer(chars + offset, suffix, constrlen(suffix));
|
||||
dstlen = length - offset + 1;
|
||||
js_InflateStringToBuffer(cx, suffix, constrlen(suffix), chars + offset, &dstlen);
|
||||
chars [offset + dstlen] = 0;
|
||||
|
||||
mark = JS_ARENA_MARK(&cx->tempPool);
|
||||
ts = js_NewBufferTokenStream(cx, chars, length);
|
||||
|
|
Загрузка…
Ссылка в новой задаче