зеркало из https://github.com/mozilla/pjs.git
r=norris,waldemar
Fixes for bugs#23607, 23608, 23610, 23612, 23613. Also, first cut at URI encode & decode routines.
This commit is contained in:
Родитель
13a6d7fb0c
Коммит
f3a0bbab33
|
@ -231,3 +231,4 @@ MSG_DEF(JSMSG_REDECLARED_VAR, 155, 2, JSEXN_TYPEERR, "redeclaration of {
|
|||
MSG_DEF(JSMSG_UNDECLARED_VAR, 156, 1, JSEXN_TYPEERR, "assignment to undeclared variable {0}")
|
||||
MSG_DEF(JSMSG_ANON_NO_RETURN_VALUE, 157, 0, JSEXN_TYPEERR, "anonymous function does not always return a value")
|
||||
MSG_DEF(JSMSG_DEPRECATED_USAGE, 158, 1, JSEXN_REFERENCEERR, "deprecated {0} usage")
|
||||
MSG_DEF(JSMSG_BAD_URI, 159, 0, JSEXN_URIERR, "malformed URI sequence")
|
||||
|
|
|
@ -1333,11 +1333,16 @@ js_IsIdentifier(JSString *str)
|
|||
n = str->length;
|
||||
s = str->chars;
|
||||
c = *s;
|
||||
if (n == 0 || !JS_ISIDENT(c))
|
||||
/*
|
||||
* We don't handle unicode escape sequences here
|
||||
* because they won't be in the input string.
|
||||
* (Right?)
|
||||
*/
|
||||
if (n == 0 || !JS_ISIDENT_START(c))
|
||||
return JS_FALSE;
|
||||
for (n--; n != 0; n--) {
|
||||
c = *++s;
|
||||
if (!JS_ISIDENT2(c))
|
||||
if (!JS_ISIDENT(c))
|
||||
return JS_FALSE;
|
||||
}
|
||||
return JS_TRUE;
|
||||
|
|
285
js/src/jsscan.c
285
js/src/jsscan.c
|
@ -276,126 +276,128 @@ GetChar(JSTokenStream *ts)
|
|||
if (ts->ungetpos != 0) {
|
||||
c = ts->ungetbuf[--ts->ungetpos];
|
||||
} else {
|
||||
if (ts->linebuf.ptr == ts->linebuf.limit) {
|
||||
len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
|
||||
if (len <= 0) {
|
||||
/* Fill ts->userbuf so that \r and \r\n convert to \n. */
|
||||
if (ts->file) {
|
||||
JSBool crflag;
|
||||
char cbuf[JS_LINE_LIMIT];
|
||||
jschar *ubuf;
|
||||
ptrdiff_t i, j;
|
||||
do {
|
||||
if (ts->linebuf.ptr == ts->linebuf.limit) {
|
||||
len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
|
||||
if (len <= 0) {
|
||||
/* Fill ts->userbuf so that \r and \r\n convert to \n. */
|
||||
if (ts->file) {
|
||||
JSBool crflag;
|
||||
char cbuf[JS_LINE_LIMIT];
|
||||
jschar *ubuf;
|
||||
ptrdiff_t i, j;
|
||||
|
||||
crflag = (ts->flags & TSF_CRFLAG) != 0;
|
||||
if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
|
||||
ts->flags |= TSF_EOF;
|
||||
return EOF;
|
||||
crflag = (ts->flags & TSF_CRFLAG) != 0;
|
||||
if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
|
||||
ts->flags |= TSF_EOF;
|
||||
return EOF;
|
||||
}
|
||||
len = olen = strlen(cbuf);
|
||||
JS_ASSERT(len > 0);
|
||||
ubuf = ts->userbuf.base;
|
||||
i = 0;
|
||||
if (crflag) {
|
||||
ts->flags &= ~TSF_CRFLAG;
|
||||
if (cbuf[0] != '\n') {
|
||||
ubuf[i++] = '\n';
|
||||
len++;
|
||||
ts->linepos--;
|
||||
}
|
||||
}
|
||||
for (j = 0; i < len; i++, j++)
|
||||
ubuf[i] = (jschar) (unsigned char) cbuf[j];
|
||||
ts->userbuf.limit = ubuf + len;
|
||||
ts->userbuf.ptr = ubuf;
|
||||
} else {
|
||||
ts->flags |= TSF_EOF;
|
||||
return EOF;
|
||||
}
|
||||
len = olen = strlen(cbuf);
|
||||
JS_ASSERT(len > 0);
|
||||
ubuf = ts->userbuf.base;
|
||||
i = 0;
|
||||
if (crflag) {
|
||||
ts->flags &= ~TSF_CRFLAG;
|
||||
if (cbuf[0] != '\n') {
|
||||
ubuf[i++] = '\n';
|
||||
len++;
|
||||
ts->linepos--;
|
||||
}
|
||||
}
|
||||
if (ts->listener)
|
||||
(*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
|
||||
&ts->listenerTSData, ts->listenerData);
|
||||
/*
|
||||
* Any one of \n, \r, or \r\n ends a line (longest match wins).
|
||||
*/
|
||||
for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
|
||||
if (*nl == '\n')
|
||||
break;
|
||||
if (*nl == '\r') {
|
||||
if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
|
||||
nl++;
|
||||
break;
|
||||
}
|
||||
for (j = 0; i < len; i++, j++)
|
||||
ubuf[i] = (jschar) (unsigned char) cbuf[j];
|
||||
ts->userbuf.limit = ubuf + len;
|
||||
ts->userbuf.ptr = ubuf;
|
||||
} else {
|
||||
ts->flags |= TSF_EOF;
|
||||
return EOF;
|
||||
}
|
||||
}
|
||||
if (ts->listener)
|
||||
(*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
|
||||
&ts->listenerTSData, ts->listenerData);
|
||||
/*
|
||||
* Any one of \n, \r, or \r\n ends a line (longest match wins).
|
||||
*/
|
||||
for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
|
||||
if (*nl == '\n')
|
||||
break;
|
||||
if (*nl == '\r') {
|
||||
if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
|
||||
nl++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there was a line terminator, copy thru it into linebuf.
|
||||
* Else copy JS_LINE_LIMIT-1 bytes into linebuf.
|
||||
*/
|
||||
if (nl < ts->userbuf.limit)
|
||||
len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
|
||||
if (len >= JS_LINE_LIMIT)
|
||||
len = JS_LINE_LIMIT - 1;
|
||||
js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
|
||||
ts->userbuf.ptr += len;
|
||||
olen = len;
|
||||
/*
|
||||
* If there was a line terminator, copy thru it into linebuf.
|
||||
* Else copy JS_LINE_LIMIT-1 bytes into linebuf.
|
||||
*/
|
||||
if (nl < ts->userbuf.limit)
|
||||
len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
|
||||
if (len >= JS_LINE_LIMIT)
|
||||
len = JS_LINE_LIMIT - 1;
|
||||
js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
|
||||
ts->userbuf.ptr += len;
|
||||
olen = len;
|
||||
|
||||
/*
|
||||
* Make sure linebuf contains \n for EOL (don't do this in
|
||||
* userbuf because the user's string might be readonly).
|
||||
*/
|
||||
if (nl < ts->userbuf.limit) {
|
||||
if (*nl == '\r') {
|
||||
if (ts->linebuf.base[len-1] == '\r') {
|
||||
/*
|
||||
* Does the line segment end in \r? We must check for
|
||||
* a \n at the front of the next segment before storing
|
||||
* a \n into linebuf. This case only matters when we're
|
||||
* reading from a file.
|
||||
*/
|
||||
if (nl + 1 == ts->userbuf.limit && ts->file) {
|
||||
/*
|
||||
* Make sure linebuf contains \n for EOL (don't do this in
|
||||
* userbuf because the user's string might be readonly).
|
||||
*/
|
||||
if (nl < ts->userbuf.limit) {
|
||||
if (*nl == '\r') {
|
||||
if (ts->linebuf.base[len-1] == '\r') {
|
||||
/*
|
||||
* Does the line segment end in \r? We must check for
|
||||
* a \n at the front of the next segment before storing
|
||||
* a \n into linebuf. This case only matters when we're
|
||||
* reading from a file.
|
||||
*/
|
||||
if (nl + 1 == ts->userbuf.limit && ts->file) {
|
||||
len--;
|
||||
ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
|
||||
if (len == 0) {
|
||||
/*
|
||||
* This can happen when a segment ends in \r\r.
|
||||
* Start over. ptr == limit in this case, so
|
||||
* we'll fall into buffer-filling code.
|
||||
*/
|
||||
return GetChar(ts);
|
||||
}
|
||||
} else
|
||||
ts->linebuf.base[len-1] = '\n';
|
||||
}
|
||||
} else if (*nl == '\n') {
|
||||
if (nl > ts->userbuf.base &&
|
||||
nl[-1] == '\r' &&
|
||||
ts->linebuf.base[len-2] == '\r') {
|
||||
len--;
|
||||
ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
|
||||
if (len == 0) {
|
||||
/*
|
||||
* This can happen when a segment ends in \r\r.
|
||||
* Start over. ptr == limit in this case, so
|
||||
* we'll fall into buffer-filling code.
|
||||
*/
|
||||
return GetChar(ts);
|
||||
}
|
||||
} else
|
||||
ts->linebuf.base[len-1] = '\n';
|
||||
JS_ASSERT(ts->linebuf.base[len] == '\n');
|
||||
ts->linebuf.base[len-1] = '\n';
|
||||
}
|
||||
}
|
||||
} else if (*nl == '\n') {
|
||||
if (nl > ts->userbuf.base &&
|
||||
nl[-1] == '\r' &&
|
||||
ts->linebuf.base[len-2] == '\r') {
|
||||
len--;
|
||||
JS_ASSERT(ts->linebuf.base[len] == '\n');
|
||||
ts->linebuf.base[len-1] = '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Reset linebuf based on adjusted segment length. */
|
||||
ts->linebuf.limit = ts->linebuf.base + len;
|
||||
ts->linebuf.ptr = ts->linebuf.base;
|
||||
|
||||
/* Update position of linebuf within physical line in userbuf. */
|
||||
if (!(ts->flags & TSF_NLFLAG))
|
||||
ts->linepos += ts->linelen;
|
||||
else
|
||||
ts->linepos = 0;
|
||||
if (ts->linebuf.limit[-1] == '\n')
|
||||
ts->flags |= TSF_NLFLAG;
|
||||
else
|
||||
ts->flags &= ~TSF_NLFLAG;
|
||||
|
||||
/* Update linelen from original segment length. */
|
||||
ts->linelen = olen;
|
||||
}
|
||||
|
||||
/* Reset linebuf based on adjusted segment length. */
|
||||
ts->linebuf.limit = ts->linebuf.base + len;
|
||||
ts->linebuf.ptr = ts->linebuf.base;
|
||||
|
||||
/* Update position of linebuf within physical line in userbuf. */
|
||||
if (!(ts->flags & TSF_NLFLAG))
|
||||
ts->linepos += ts->linelen;
|
||||
else
|
||||
ts->linepos = 0;
|
||||
if (ts->linebuf.limit[-1] == '\n')
|
||||
ts->flags |= TSF_NLFLAG;
|
||||
else
|
||||
ts->flags &= ~TSF_NLFLAG;
|
||||
|
||||
/* Update linelen from original segment length. */
|
||||
ts->linelen = olen;
|
||||
}
|
||||
c = *ts->linebuf.ptr++;
|
||||
c = *ts->linebuf.ptr++;
|
||||
} while (JS_ISFORMAT(c));
|
||||
}
|
||||
if (c == '\n')
|
||||
ts->lineno++;
|
||||
|
@ -642,6 +644,30 @@ AddToTokenBuf(JSContext *cx, JSTokenBuf *tb, jschar c)
|
|||
return JS_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* We encountered a '\', check for a following unicode
|
||||
* escape sequence - returning it's value if so.
|
||||
* Otherwise, non-destructively return the original '\'.
|
||||
*/
|
||||
static int32
|
||||
getUnicodeEscape(JSTokenStream *ts)
|
||||
{
|
||||
jschar cp[5];
|
||||
int32 c;
|
||||
if (PeekChars(ts, 5, cp) && (cp[0] == 'u') &&
|
||||
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
|
||||
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
|
||||
c = (((((JS7_UNHEX(cp[1]) << 4)
|
||||
+ JS7_UNHEX(cp[2])) << 4)
|
||||
+ JS7_UNHEX(cp[3])) << 4)
|
||||
+ JS7_UNHEX(cp[4]);
|
||||
SkipChars(ts, 5);
|
||||
}
|
||||
else
|
||||
c = '\\';
|
||||
return c;
|
||||
}
|
||||
|
||||
JSTokenType
|
||||
js_GetToken(JSContext *cx, JSTokenStream *ts)
|
||||
{
|
||||
|
@ -649,6 +675,7 @@ js_GetToken(JSContext *cx, JSTokenStream *ts)
|
|||
JSToken *tp;
|
||||
int32 c;
|
||||
JSAtom *atom;
|
||||
JSBool hadUnicodeEscape;
|
||||
|
||||
#define INIT_TOKENBUF(tb) ((tb)->ptr = (tb)->base)
|
||||
#define FINISH_TOKENBUF(tb) if (!AddToTokenBuf(cx, tb, 0)) RETURN(TOK_ERROR)
|
||||
|
@ -688,13 +715,26 @@ retry:
|
|||
if (c == EOF)
|
||||
RETURN(TOK_EOF);
|
||||
|
||||
if (JS_ISIDENT(c)) {
|
||||
hadUnicodeEscape = JS_FALSE;
|
||||
if (JS_ISIDENT_START(c)
|
||||
|| ((c == '\\')
|
||||
&& (c = getUnicodeEscape(ts),
|
||||
hadUnicodeEscape = JS_ISIDENT_START(c)))) {
|
||||
INIT_TOKENBUF(&ts->tokenbuf);
|
||||
do {
|
||||
for (;;) {
|
||||
if (!AddToTokenBuf(cx, &ts->tokenbuf, (jschar)c))
|
||||
RETURN(TOK_ERROR);
|
||||
c = GetChar(ts);
|
||||
} while (JS_ISIDENT2(c));
|
||||
if (c == '\\') {
|
||||
c = getUnicodeEscape(ts);
|
||||
if (JS_ISIDENT(c))
|
||||
hadUnicodeEscape = JS_TRUE;
|
||||
else
|
||||
break;
|
||||
}
|
||||
else
|
||||
if (!JS_ISIDENT(c)) break;
|
||||
}
|
||||
UngetChar(ts, c);
|
||||
FINISH_TOKENBUF(&ts->tokenbuf);
|
||||
|
||||
|
@ -704,13 +744,16 @@ retry:
|
|||
0);
|
||||
if (!atom)
|
||||
RETURN(TOK_ERROR);
|
||||
if (atom->kwindex >= 0) {
|
||||
struct keyword *kw;
|
||||
if (hadUnicodeEscape) /* Can never be a keyword, then. */
|
||||
atom->kwindex = -1;
|
||||
else
|
||||
if (atom->kwindex >= 0) {
|
||||
struct keyword *kw;
|
||||
|
||||
kw = &keywords[atom->kwindex];
|
||||
tp->t_op = (JSOp) kw->op;
|
||||
RETURN(kw->tokentype);
|
||||
}
|
||||
kw = &keywords[atom->kwindex];
|
||||
tp->t_op = (JSOp) kw->op;
|
||||
RETURN(kw->tokentype);
|
||||
}
|
||||
tp->t_op = JSOP_NAME;
|
||||
tp->t_atom = atom;
|
||||
RETURN(TOK_NAME);
|
||||
|
|
|
@ -153,7 +153,7 @@ struct JSTokenStream {
|
|||
uintN lookahead; /* count of lookahead tokens */
|
||||
uintN lineno; /* current line number */
|
||||
uintN ungetpos; /* next free char slot in ungetbuf */
|
||||
jschar ungetbuf[4]; /* at most 4, for \uXXXX lookahead */
|
||||
jschar ungetbuf[5]; /* at most 5, for \uXXXX lookahead */
|
||||
uintN flags; /* flags -- see below */
|
||||
ptrdiff_t linelen; /* physical linebuf segment length */
|
||||
ptrdiff_t linepos; /* linebuf offset in physical line */
|
||||
|
|
303
js/src/jsstr.c
303
js/src/jsstr.c
|
@ -67,6 +67,20 @@
|
|||
#include "jsinterp.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Forward declarations for URI encode/decode and helper routines
|
||||
*/
|
||||
static JSBool
|
||||
str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
|
||||
static JSBool
|
||||
str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
|
||||
static JSBool
|
||||
str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
|
||||
static JSBool
|
||||
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
|
||||
static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char);
|
||||
static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length);
|
||||
|
||||
/* Contributions from the String class to the set of methods defined for the
|
||||
* global object. escape and unescape used to be defined in the Mocha library,
|
||||
* but as ECMA decided to spec them, they've been moved to the core engine
|
||||
|
@ -260,6 +274,11 @@ static JSFunctionSpec string_functions[] = {
|
|||
#if JS_HAS_UNEVAL
|
||||
{"uneval", str_uneval, 1,0,0},
|
||||
#endif
|
||||
{"decodeURI", str_decodeURI, 1,0,0},
|
||||
{"encodeURI", str_encodeURI, 1,0,0},
|
||||
{"decodeURIComponent", str_decodeURI_Component, 1,0,0},
|
||||
{"encodeURIComponent", str_encodeURI_Component, 1,0,0},
|
||||
|
||||
{0,0,0,0,0}
|
||||
};
|
||||
|
||||
|
@ -865,7 +884,7 @@ match_or_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|||
ok = JS_TRUE;
|
||||
re->lastIndex = 0;
|
||||
for (count = 0; index <= str->length; count++) {
|
||||
ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);
|
||||
ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);
|
||||
if (!ok || *rval != JSVAL_TRUE)
|
||||
break;
|
||||
ok = glob(cx, count, data);
|
||||
|
@ -1066,6 +1085,12 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
|
|||
void *mark;
|
||||
JSStackFrame *fp;
|
||||
JSBool ok;
|
||||
/*
|
||||
* Save the rightContext from the current regexp, since it
|
||||
* gets stuck at the end of the replacement string and may
|
||||
* be clobbered by a RegExp usage in the lambda function.
|
||||
*/
|
||||
JSSubString saveRightContext = cx->regExpStatics.rightContext;
|
||||
|
||||
/*
|
||||
* In the lambda case, not only do we find the replacement string's
|
||||
|
@ -1143,6 +1168,7 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
|
|||
|
||||
lambda_out:
|
||||
js_FreeStack(cx, mark);
|
||||
cx->regExpStatics.rightContext = saveRightContext;
|
||||
return ok;
|
||||
}
|
||||
#endif /* JS_HAS_REPLACE_LAMBDA */
|
||||
|
@ -1264,8 +1290,10 @@ str_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|||
rdata.length = 0;
|
||||
rdata.index = 0;
|
||||
rdata.leftIndex = 0;
|
||||
/* for ECMA 3, the first argument is to be treated as a string
|
||||
(i.e. converted to one if necessary) UNLESS it's a reg.exp object */
|
||||
/*
|
||||
* For ECMA 3, the first argument is to be treated as a string
|
||||
* (i.e. converted to one if necessary) UNLESS it's a reg.exp object.
|
||||
*/
|
||||
if (!match_or_replace(cx, obj, argc, argv, replace_glob, &rdata.base, rval,
|
||||
(cx->version == JSVERSION_DEFAULT || cx->version > JSVERSION_1_4)))
|
||||
return JS_FALSE;
|
||||
|
@ -2643,7 +2671,7 @@ const uint8 js_Y[] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
|
||||
0, 1, 1, 1, 1, 1, 0, 0, /* 0 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
|
||||
0, 0, 0, 0, 1, 1, 1, 1, /* 0 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
|
||||
2, 3, 3, 3, 4, 3, 3, 3, /* 0 */
|
||||
5, 6, 3, 7, 3, 8, 3, 3, /* 0 */
|
||||
9, 9, 9, 9, 9, 9, 9, 9, /* 0 */
|
||||
|
@ -2660,7 +2688,7 @@ const uint8 js_Y[] = {
|
|||
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
|
||||
14, 3, 4, 4, 4, 4, 15, 15, /* 2 */
|
||||
2, 3, 4, 4, 4, 4, 15, 15, /* 2 */
|
||||
11, 15, 16, 5, 7, 8, 15, 11, /* 2 */
|
||||
15, 7, 17, 17, 11, 16, 15, 3, /* 2 */
|
||||
11, 18, 16, 6, 19, 19, 19, 3, /* 2 */
|
||||
|
@ -3762,3 +3790,268 @@ js_ToLower(jschar c)
|
|||
return (v & 0x00200000) ? c + ((int32)v >> 22) : c;
|
||||
}
|
||||
#endif /* !__GNUC__ */
|
||||
|
||||
char *uriReservedPlusPound = ";/?:@&=+$,#";
|
||||
char *uriUnescaped = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.!~*'()";
|
||||
|
||||
/* concatenate jschars onto a JSString */
|
||||
static JSBool
|
||||
add_chars(JSContext *cx, JSString *str, jschar *chars, size_t length)
|
||||
{
|
||||
str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
|
||||
if (!str->chars)
|
||||
return JS_FALSE;
|
||||
js_strncpy(str->chars + str->length, chars, length);
|
||||
str->length += length;
|
||||
return JS_TRUE;
|
||||
}
|
||||
|
||||
/* concatenate chars onto a JSString */
|
||||
static JSBool
|
||||
add_bytes(JSContext *cx, JSString *str, char *bytes, size_t length)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
|
||||
if (!str->chars)
|
||||
return JS_FALSE;
|
||||
for (i = 0; i < length; i++)
|
||||
str->chars[str->length + i] = (unsigned char)bytes[i];
|
||||
str->chars[str->length + length] = 0;
|
||||
str->length += length;
|
||||
return JS_TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* ECMA 3, 15.1.3 URI Handling Function Properties
|
||||
*
|
||||
* The following are implementations of the algorithms
|
||||
* given in the ECMA specification for the hidden functions
|
||||
* 'Encode' and 'Decode'.
|
||||
*/
|
||||
static JSBool encode(JSContext *cx, JSString *str, JSString *unescapedSet, jsval *rval)
|
||||
{
|
||||
size_t j, k = 0, L;
|
||||
jschar C, C2;
|
||||
uint32 V;
|
||||
unsigned char utf8buf[6];
|
||||
char hexBuf[4];
|
||||
JSString *R;
|
||||
|
||||
R = js_NewString(cx, NULL, 0, 0);
|
||||
if (!R)
|
||||
return JS_FALSE;
|
||||
|
||||
while (k < str->length) {
|
||||
C = str->chars[k];
|
||||
if (js_strchr(unescapedSet->chars, C)) {
|
||||
if (!add_chars(cx, R, &C, 1))
|
||||
return JS_FALSE;
|
||||
} else {
|
||||
if ((C >= 0xDC00) && (C <= 0xDFFF)) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
if ((C < 0xD800) || (C > 0xDBFF))
|
||||
V = C;
|
||||
else {
|
||||
k++;
|
||||
if (k == str->length) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
C2 = str->chars[k];
|
||||
if ((C2 < 0xDC00) || (C2 > 0xDFFF)) {
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
|
||||
}
|
||||
L = oneUcs4ToUtf8Char(utf8buf, V);
|
||||
for (j = 0; j < L; j++) {
|
||||
sprintf(hexBuf, "%%%.2X", utf8buf[j]);
|
||||
if (!add_bytes(cx, R, hexBuf, 3))
|
||||
return JS_FALSE;
|
||||
}
|
||||
}
|
||||
k++;
|
||||
}
|
||||
*rval = STRING_TO_JSVAL(R);
|
||||
return JS_TRUE;
|
||||
}
|
||||
|
||||
static JSBool decode(JSContext *cx, JSString *str, JSString *reservedSet, jsval *rval)
|
||||
{
|
||||
size_t start, k = 0;
|
||||
jschar C, H;
|
||||
uint32 V;
|
||||
uint B;
|
||||
unsigned char octets[6];
|
||||
JSString *R;
|
||||
int16 j, n;
|
||||
|
||||
R = js_NewString(cx, NULL, 0, 0);
|
||||
if (!R)
|
||||
return JS_FALSE;
|
||||
|
||||
while (k < str->length) {
|
||||
C = str->chars[k];
|
||||
if (C == '%') {
|
||||
start = k;
|
||||
if ((k + 2) >= str->length) goto errOut;
|
||||
if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2]))
|
||||
goto errOut;
|
||||
B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
|
||||
k += 2;
|
||||
if (!(B & 0x80))
|
||||
C = B;
|
||||
else {
|
||||
n = 1;
|
||||
while (B & (0x80 >> n)) n++;
|
||||
if ((n == 1) || (n > 6)) goto errOut;
|
||||
octets[0] = (char)B;
|
||||
if ((k + 3 * (n - 1)) >= str->length) goto errOut;
|
||||
for (j = 1; j < n; j++) {
|
||||
k++;
|
||||
if (str->chars[k] != '%') goto errOut;
|
||||
if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2]))
|
||||
goto errOut;
|
||||
B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
|
||||
if ((B & 0xC0) != 0x80) goto errOut;
|
||||
k += 2;
|
||||
octets[j] = (char)B;
|
||||
}
|
||||
V = utf8ToOneUcs4Char(octets, n);
|
||||
if (V >= 0x10000) {
|
||||
V -= 0x10000;
|
||||
if (V > 0xFFFFF) goto errOut;
|
||||
C = (jschar)((V & 0x3FF) + 0xDC00);
|
||||
H = (jschar)((V >> 10) + 0xD800);
|
||||
if (!add_chars(cx, R, &H, 1)) return JS_FALSE;
|
||||
}
|
||||
else
|
||||
C = (jschar)V;
|
||||
}
|
||||
if (js_strchr(reservedSet->chars, C)) {
|
||||
if (!add_chars(cx, R, &str->chars[start], (k - start + 1))) return JS_FALSE;
|
||||
}
|
||||
else
|
||||
if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
|
||||
}
|
||||
else
|
||||
if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
|
||||
k++;
|
||||
}
|
||||
*rval = STRING_TO_JSVAL(R);
|
||||
return JS_TRUE;
|
||||
errOut:
|
||||
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
||||
JSMSG_BAD_URI, NULL);
|
||||
return JS_FALSE;
|
||||
}
|
||||
|
||||
static JSBool
|
||||
str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
JSString *str, *reservedURISet;
|
||||
str = js_ValueToString(cx, argv[0]);
|
||||
if (!str)
|
||||
return JS_FALSE;
|
||||
reservedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
|
||||
if (!reservedURISet)
|
||||
return JS_FALSE;
|
||||
return decode(cx, str, reservedURISet, rval);
|
||||
}
|
||||
|
||||
static JSBool
|
||||
str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
JSString *str;
|
||||
str = js_ValueToString(cx, argv[0]);
|
||||
if (!str)
|
||||
return JS_FALSE;
|
||||
return decode(cx, str, cx->runtime->emptyString, rval);
|
||||
}
|
||||
|
||||
static JSBool
|
||||
str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
JSString *str, *unescapedURISet;
|
||||
str = js_ValueToString(cx, argv[0]);
|
||||
if (!str)
|
||||
return JS_FALSE;
|
||||
unescapedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
|
||||
if (!unescapedURISet)
|
||||
return JS_FALSE;
|
||||
if (!add_bytes(cx, unescapedURISet, uriUnescaped, strlen(uriUnescaped)))
|
||||
return JS_FALSE;
|
||||
return encode(cx, str, unescapedURISet, rval);
|
||||
}
|
||||
|
||||
static JSBool
|
||||
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
||||
{
|
||||
JSString *str, *unescapedURISet;
|
||||
str = js_ValueToString(cx, argv[0]);
|
||||
if (!str)
|
||||
return JS_FALSE;
|
||||
unescapedURISet = JS_NewStringCopyZ(cx, uriUnescaped);
|
||||
if (!unescapedURISet)
|
||||
return JS_FALSE;
|
||||
return encode(cx, str, unescapedURISet, rval);
|
||||
}
|
||||
|
||||
|
||||
/* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be
|
||||
* at least 6 bytes long. Return the number of UTF-8 bytes of data written.
|
||||
*/
|
||||
static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char)
|
||||
{
|
||||
int utf8Length = 1;
|
||||
|
||||
JS_ASSERT(ucs4Char <= 0x7FFFFFFF);
|
||||
if (ucs4Char < 0x80)
|
||||
*utf8Buffer = (unsigned char)ucs4Char;
|
||||
else {
|
||||
int i;
|
||||
uint32 a = ucs4Char >> 11;
|
||||
utf8Length = 2;
|
||||
while (a) {
|
||||
a >>= 5;
|
||||
utf8Length++;
|
||||
}
|
||||
i = utf8Length;
|
||||
while (--i) {
|
||||
utf8Buffer[i] = (unsigned char)(ucs4Char & 0x3F | 0x80);
|
||||
ucs4Char >>= 6;
|
||||
}
|
||||
*utf8Buffer = (unsigned char)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
|
||||
}
|
||||
return utf8Length;
|
||||
}
|
||||
|
||||
|
||||
/* Convert a utf8 character sequence into a UCS-4 character and return that
|
||||
* character. It is assumed that the caller already checked that the sequence is valid.
|
||||
*/
|
||||
static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length)
|
||||
{
|
||||
uint32 ucs4Char;
|
||||
|
||||
JS_ASSERT(utf8Length >= 1 && utf8Length <= 6);
|
||||
if (utf8Length == 1) {
|
||||
ucs4Char = *utf8Buffer;
|
||||
JS_ASSERT(!(ucs4Char & 0x80));
|
||||
} else {
|
||||
JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7-utf8Length)))) == (0x100 - (1 << (8-utf8Length))));
|
||||
ucs4Char = *utf8Buffer++ & (1<<(7-utf8Length))-1;
|
||||
while (--utf8Length) {
|
||||
JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);
|
||||
ucs4Char = ucs4Char<<6 | *utf8Buffer++ & 0x3F;
|
||||
}
|
||||
}
|
||||
return ucs4Char;
|
||||
}
|
||||
|
|
|
@ -122,11 +122,39 @@ typedef enum JSCharType {
|
|||
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
|
||||
>> JS_CTYPE(c)) & 1)
|
||||
|
||||
/* A unicode letter, suitable for use in an identifier. */
|
||||
#define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
|
||||
(1 << JSCT_LOWERCASE_LETTER) | \
|
||||
(1 << JSCT_TITLECASE_LETTER) | \
|
||||
(1 << JSCT_MODIFIER_LETTER) | \
|
||||
(1 << JSCT_OTHER_LETTER) | \
|
||||
(1 << JSCT_LETTER_NUMBER)) \
|
||||
>> JS_CTYPE(c)) & 1)
|
||||
|
||||
/*
|
||||
* 'IdentifierPart' from ECMA grammar, is Unicode letter or
|
||||
* combining mark or digit or connector punctuation.
|
||||
*/
|
||||
#define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
|
||||
(1 << JSCT_LOWERCASE_LETTER) | \
|
||||
(1 << JSCT_TITLECASE_LETTER) | \
|
||||
(1 << JSCT_MODIFIER_LETTER) | \
|
||||
(1 << JSCT_OTHER_LETTER) | \
|
||||
(1 << JSCT_LETTER_NUMBER) | \
|
||||
(1 << JSCT_NON_SPACING_MARK) | \
|
||||
(1 << JSCT_COMBINING_SPACING_MARK) | \
|
||||
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
|
||||
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
|
||||
>> JS_CTYPE(c)) & 1)
|
||||
|
||||
/* Unicode control-format characters, ignored in input */
|
||||
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
|
||||
|
||||
#define JS_ISWORD(c) (JS_ISALNUM(c) || (c) == '_')
|
||||
|
||||
/* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
|
||||
#define JS_ISIDENT(c) ((c) < 128 && (isalpha(c) || (c) == '_' || (c) == '$'))
|
||||
#define JS_ISIDENT2(c) ((c) < 128 && (isalnum(c) || (c) == '_' || (c) == '$'))
|
||||
#define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
|
||||
#define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
|
||||
|
||||
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче