Fixes for bugs#23607, 23608, 23610, 23612, 23613. Also, first cut at URI
encode & decode routines.
This commit is contained in:
rogerl%netscape.com 2000-01-26 22:15:06 +00:00
Родитель 13a6d7fb0c
Коммит f3a0bbab33
6 изменённых файлов: 501 добавлений и 131 удалений

Просмотреть файл

@ -231,3 +231,4 @@ MSG_DEF(JSMSG_REDECLARED_VAR, 155, 2, JSEXN_TYPEERR, "redeclaration of {
MSG_DEF(JSMSG_UNDECLARED_VAR, 156, 1, JSEXN_TYPEERR, "assignment to undeclared variable {0}")
MSG_DEF(JSMSG_ANON_NO_RETURN_VALUE, 157, 0, JSEXN_TYPEERR, "anonymous function does not always return a value")
MSG_DEF(JSMSG_DEPRECATED_USAGE, 158, 1, JSEXN_REFERENCEERR, "deprecated {0} usage")
MSG_DEF(JSMSG_BAD_URI, 159, 0, JSEXN_URIERR, "malformed URI sequence")

Просмотреть файл

@ -1333,11 +1333,16 @@ js_IsIdentifier(JSString *str)
n = str->length;
s = str->chars;
c = *s;
if (n == 0 || !JS_ISIDENT(c))
/*
* We don't handle unicode escape sequences here
* because they won't be in the input string.
* (Right?)
*/
if (n == 0 || !JS_ISIDENT_START(c))
return JS_FALSE;
for (n--; n != 0; n--) {
c = *++s;
if (!JS_ISIDENT2(c))
if (!JS_ISIDENT(c))
return JS_FALSE;
}
return JS_TRUE;

Просмотреть файл

@ -276,126 +276,128 @@ GetChar(JSTokenStream *ts)
if (ts->ungetpos != 0) {
c = ts->ungetbuf[--ts->ungetpos];
} else {
if (ts->linebuf.ptr == ts->linebuf.limit) {
len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
if (len <= 0) {
/* Fill ts->userbuf so that \r and \r\n convert to \n. */
if (ts->file) {
JSBool crflag;
char cbuf[JS_LINE_LIMIT];
jschar *ubuf;
ptrdiff_t i, j;
do {
if (ts->linebuf.ptr == ts->linebuf.limit) {
len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
if (len <= 0) {
/* Fill ts->userbuf so that \r and \r\n convert to \n. */
if (ts->file) {
JSBool crflag;
char cbuf[JS_LINE_LIMIT];
jschar *ubuf;
ptrdiff_t i, j;
crflag = (ts->flags & TSF_CRFLAG) != 0;
if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
ts->flags |= TSF_EOF;
return EOF;
crflag = (ts->flags & TSF_CRFLAG) != 0;
if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
ts->flags |= TSF_EOF;
return EOF;
}
len = olen = strlen(cbuf);
JS_ASSERT(len > 0);
ubuf = ts->userbuf.base;
i = 0;
if (crflag) {
ts->flags &= ~TSF_CRFLAG;
if (cbuf[0] != '\n') {
ubuf[i++] = '\n';
len++;
ts->linepos--;
}
}
for (j = 0; i < len; i++, j++)
ubuf[i] = (jschar) (unsigned char) cbuf[j];
ts->userbuf.limit = ubuf + len;
ts->userbuf.ptr = ubuf;
} else {
ts->flags |= TSF_EOF;
return EOF;
}
len = olen = strlen(cbuf);
JS_ASSERT(len > 0);
ubuf = ts->userbuf.base;
i = 0;
if (crflag) {
ts->flags &= ~TSF_CRFLAG;
if (cbuf[0] != '\n') {
ubuf[i++] = '\n';
len++;
ts->linepos--;
}
}
if (ts->listener)
(*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
&ts->listenerTSData, ts->listenerData);
/*
* Any one of \n, \r, or \r\n ends a line (longest match wins).
*/
for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
if (*nl == '\n')
break;
if (*nl == '\r') {
if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
nl++;
break;
}
for (j = 0; i < len; i++, j++)
ubuf[i] = (jschar) (unsigned char) cbuf[j];
ts->userbuf.limit = ubuf + len;
ts->userbuf.ptr = ubuf;
} else {
ts->flags |= TSF_EOF;
return EOF;
}
}
if (ts->listener)
(*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
&ts->listenerTSData, ts->listenerData);
/*
* Any one of \n, \r, or \r\n ends a line (longest match wins).
*/
for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
if (*nl == '\n')
break;
if (*nl == '\r') {
if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
nl++;
break;
}
}
}
/*
* If there was a line terminator, copy thru it into linebuf.
* Else copy JS_LINE_LIMIT-1 bytes into linebuf.
*/
if (nl < ts->userbuf.limit)
len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
if (len >= JS_LINE_LIMIT)
len = JS_LINE_LIMIT - 1;
js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
ts->userbuf.ptr += len;
olen = len;
/*
* If there was a line terminator, copy thru it into linebuf.
* Else copy JS_LINE_LIMIT-1 bytes into linebuf.
*/
if (nl < ts->userbuf.limit)
len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
if (len >= JS_LINE_LIMIT)
len = JS_LINE_LIMIT - 1;
js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
ts->userbuf.ptr += len;
olen = len;
/*
* Make sure linebuf contains \n for EOL (don't do this in
* userbuf because the user's string might be readonly).
*/
if (nl < ts->userbuf.limit) {
if (*nl == '\r') {
if (ts->linebuf.base[len-1] == '\r') {
/*
* Does the line segment end in \r? We must check for
* a \n at the front of the next segment before storing
* a \n into linebuf. This case only matters when we're
* reading from a file.
*/
if (nl + 1 == ts->userbuf.limit && ts->file) {
/*
* Make sure linebuf contains \n for EOL (don't do this in
* userbuf because the user's string might be readonly).
*/
if (nl < ts->userbuf.limit) {
if (*nl == '\r') {
if (ts->linebuf.base[len-1] == '\r') {
/*
* Does the line segment end in \r? We must check for
* a \n at the front of the next segment before storing
* a \n into linebuf. This case only matters when we're
* reading from a file.
*/
if (nl + 1 == ts->userbuf.limit && ts->file) {
len--;
ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
if (len == 0) {
/*
* This can happen when a segment ends in \r\r.
* Start over. ptr == limit in this case, so
* we'll fall into buffer-filling code.
*/
return GetChar(ts);
}
} else
ts->linebuf.base[len-1] = '\n';
}
} else if (*nl == '\n') {
if (nl > ts->userbuf.base &&
nl[-1] == '\r' &&
ts->linebuf.base[len-2] == '\r') {
len--;
ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
if (len == 0) {
/*
* This can happen when a segment ends in \r\r.
* Start over. ptr == limit in this case, so
* we'll fall into buffer-filling code.
*/
return GetChar(ts);
}
} else
ts->linebuf.base[len-1] = '\n';
JS_ASSERT(ts->linebuf.base[len] == '\n');
ts->linebuf.base[len-1] = '\n';
}
}
} else if (*nl == '\n') {
if (nl > ts->userbuf.base &&
nl[-1] == '\r' &&
ts->linebuf.base[len-2] == '\r') {
len--;
JS_ASSERT(ts->linebuf.base[len] == '\n');
ts->linebuf.base[len-1] = '\n';
}
}
}
/* Reset linebuf based on adjusted segment length. */
ts->linebuf.limit = ts->linebuf.base + len;
ts->linebuf.ptr = ts->linebuf.base;
/* Update position of linebuf within physical line in userbuf. */
if (!(ts->flags & TSF_NLFLAG))
ts->linepos += ts->linelen;
else
ts->linepos = 0;
if (ts->linebuf.limit[-1] == '\n')
ts->flags |= TSF_NLFLAG;
else
ts->flags &= ~TSF_NLFLAG;
/* Update linelen from original segment length. */
ts->linelen = olen;
}
/* Reset linebuf based on adjusted segment length. */
ts->linebuf.limit = ts->linebuf.base + len;
ts->linebuf.ptr = ts->linebuf.base;
/* Update position of linebuf within physical line in userbuf. */
if (!(ts->flags & TSF_NLFLAG))
ts->linepos += ts->linelen;
else
ts->linepos = 0;
if (ts->linebuf.limit[-1] == '\n')
ts->flags |= TSF_NLFLAG;
else
ts->flags &= ~TSF_NLFLAG;
/* Update linelen from original segment length. */
ts->linelen = olen;
}
c = *ts->linebuf.ptr++;
c = *ts->linebuf.ptr++;
} while (JS_ISFORMAT(c));
}
if (c == '\n')
ts->lineno++;
@ -642,6 +644,30 @@ AddToTokenBuf(JSContext *cx, JSTokenBuf *tb, jschar c)
return JS_TRUE;
}
/*
* We encountered a '\', check for a following unicode
* escape sequence - returning it's value if so.
* Otherwise, non-destructively return the original '\'.
*/
static int32
getUnicodeEscape(JSTokenStream *ts)
{
jschar cp[5];
int32 c;
if (PeekChars(ts, 5, cp) && (cp[0] == 'u') &&
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
c = (((((JS7_UNHEX(cp[1]) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3])) << 4)
+ JS7_UNHEX(cp[4]);
SkipChars(ts, 5);
}
else
c = '\\';
return c;
}
JSTokenType
js_GetToken(JSContext *cx, JSTokenStream *ts)
{
@ -649,6 +675,7 @@ js_GetToken(JSContext *cx, JSTokenStream *ts)
JSToken *tp;
int32 c;
JSAtom *atom;
JSBool hadUnicodeEscape;
#define INIT_TOKENBUF(tb) ((tb)->ptr = (tb)->base)
#define FINISH_TOKENBUF(tb) if (!AddToTokenBuf(cx, tb, 0)) RETURN(TOK_ERROR)
@ -688,13 +715,26 @@ retry:
if (c == EOF)
RETURN(TOK_EOF);
if (JS_ISIDENT(c)) {
hadUnicodeEscape = JS_FALSE;
if (JS_ISIDENT_START(c)
|| ((c == '\\')
&& (c = getUnicodeEscape(ts),
hadUnicodeEscape = JS_ISIDENT_START(c)))) {
INIT_TOKENBUF(&ts->tokenbuf);
do {
for (;;) {
if (!AddToTokenBuf(cx, &ts->tokenbuf, (jschar)c))
RETURN(TOK_ERROR);
c = GetChar(ts);
} while (JS_ISIDENT2(c));
if (c == '\\') {
c = getUnicodeEscape(ts);
if (JS_ISIDENT(c))
hadUnicodeEscape = JS_TRUE;
else
break;
}
else
if (!JS_ISIDENT(c)) break;
}
UngetChar(ts, c);
FINISH_TOKENBUF(&ts->tokenbuf);
@ -704,13 +744,16 @@ retry:
0);
if (!atom)
RETURN(TOK_ERROR);
if (atom->kwindex >= 0) {
struct keyword *kw;
if (hadUnicodeEscape) /* Can never be a keyword, then. */
atom->kwindex = -1;
else
if (atom->kwindex >= 0) {
struct keyword *kw;
kw = &keywords[atom->kwindex];
tp->t_op = (JSOp) kw->op;
RETURN(kw->tokentype);
}
kw = &keywords[atom->kwindex];
tp->t_op = (JSOp) kw->op;
RETURN(kw->tokentype);
}
tp->t_op = JSOP_NAME;
tp->t_atom = atom;
RETURN(TOK_NAME);

Просмотреть файл

@ -153,7 +153,7 @@ struct JSTokenStream {
uintN lookahead; /* count of lookahead tokens */
uintN lineno; /* current line number */
uintN ungetpos; /* next free char slot in ungetbuf */
jschar ungetbuf[4]; /* at most 4, for \uXXXX lookahead */
jschar ungetbuf[5]; /* at most 5, for \uXXXX lookahead */
uintN flags; /* flags -- see below */
ptrdiff_t linelen; /* physical linebuf segment length */
ptrdiff_t linepos; /* linebuf offset in physical line */

Просмотреть файл

@ -67,6 +67,20 @@
#include "jsinterp.h"
#endif
/*
* Forward declarations for URI encode/decode and helper routines
*/
static JSBool
str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
static JSBool
str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
static JSBool
str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
static JSBool
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char);
static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length);
/* Contributions from the String class to the set of methods defined for the
* global object. escape and unescape used to be defined in the Mocha library,
* but as ECMA decided to spec them, they've been moved to the core engine
@ -260,6 +274,11 @@ static JSFunctionSpec string_functions[] = {
#if JS_HAS_UNEVAL
{"uneval", str_uneval, 1,0,0},
#endif
{"decodeURI", str_decodeURI, 1,0,0},
{"encodeURI", str_encodeURI, 1,0,0},
{"decodeURIComponent", str_decodeURI_Component, 1,0,0},
{"encodeURIComponent", str_encodeURI_Component, 1,0,0},
{0,0,0,0,0}
};
@ -865,7 +884,7 @@ match_or_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
ok = JS_TRUE;
re->lastIndex = 0;
for (count = 0; index <= str->length; count++) {
ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);
ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);
if (!ok || *rval != JSVAL_TRUE)
break;
ok = glob(cx, count, data);
@ -1066,6 +1085,12 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
void *mark;
JSStackFrame *fp;
JSBool ok;
/*
* Save the rightContext from the current regexp, since it
* gets stuck at the end of the replacement string and may
* be clobbered by a RegExp usage in the lambda function.
*/
JSSubString saveRightContext = cx->regExpStatics.rightContext;
/*
* In the lambda case, not only do we find the replacement string's
@ -1143,6 +1168,7 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
lambda_out:
js_FreeStack(cx, mark);
cx->regExpStatics.rightContext = saveRightContext;
return ok;
}
#endif /* JS_HAS_REPLACE_LAMBDA */
@ -1264,8 +1290,10 @@ str_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
rdata.length = 0;
rdata.index = 0;
rdata.leftIndex = 0;
/* for ECMA 3, the first argument is to be treated as a string
(i.e. converted to one if necessary) UNLESS it's a reg.exp object */
/*
* For ECMA 3, the first argument is to be treated as a string
* (i.e. converted to one if necessary) UNLESS it's a reg.exp object.
*/
if (!match_or_replace(cx, obj, argc, argv, replace_glob, &rdata.base, rval,
(cx->version == JSVERSION_DEFAULT || cx->version > JSVERSION_1_4)))
return JS_FALSE;
@ -2643,7 +2671,7 @@ const uint8 js_Y[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
0, 1, 1, 1, 1, 1, 0, 0, /* 0 */
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
0, 0, 0, 0, 1, 1, 1, 1, /* 0 */
0, 0, 0, 0, 0, 0, 0, 0, /* 0 */
2, 3, 3, 3, 4, 3, 3, 3, /* 0 */
5, 6, 3, 7, 3, 8, 3, 3, /* 0 */
9, 9, 9, 9, 9, 9, 9, 9, /* 0 */
@ -2660,7 +2688,7 @@ const uint8 js_Y[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
0, 0, 0, 0, 0, 0, 0, 0, /* 2 */
14, 3, 4, 4, 4, 4, 15, 15, /* 2 */
2, 3, 4, 4, 4, 4, 15, 15, /* 2 */
11, 15, 16, 5, 7, 8, 15, 11, /* 2 */
15, 7, 17, 17, 11, 16, 15, 3, /* 2 */
11, 18, 16, 6, 19, 19, 19, 3, /* 2 */
@ -3762,3 +3790,268 @@ js_ToLower(jschar c)
return (v & 0x00200000) ? c + ((int32)v >> 22) : c;
}
#endif /* !__GNUC__ */
char *uriReservedPlusPound = ";/?:@&=+$,#";
char *uriUnescaped = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.!~*'()";
/* concatenate jschars onto a JSString */
static JSBool
add_chars(JSContext *cx, JSString *str, jschar *chars, size_t length)
{
str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
if (!str->chars)
return JS_FALSE;
js_strncpy(str->chars + str->length, chars, length);
str->length += length;
return JS_TRUE;
}
/* concatenate chars onto a JSString */
static JSBool
add_bytes(JSContext *cx, JSString *str, char *bytes, size_t length)
{
size_t i;
str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
if (!str->chars)
return JS_FALSE;
for (i = 0; i < length; i++)
str->chars[str->length + i] = (unsigned char)bytes[i];
str->chars[str->length + length] = 0;
str->length += length;
return JS_TRUE;
}
/*
* ECMA 3, 15.1.3 URI Handling Function Properties
*
* The following are implementations of the algorithms
* given in the ECMA specification for the hidden functions
* 'Encode' and 'Decode'.
*/
static JSBool encode(JSContext *cx, JSString *str, JSString *unescapedSet, jsval *rval)
{
size_t j, k = 0, L;
jschar C, C2;
uint32 V;
unsigned char utf8buf[6];
char hexBuf[4];
JSString *R;
R = js_NewString(cx, NULL, 0, 0);
if (!R)
return JS_FALSE;
while (k < str->length) {
C = str->chars[k];
if (js_strchr(unescapedSet->chars, C)) {
if (!add_chars(cx, R, &C, 1))
return JS_FALSE;
} else {
if ((C >= 0xDC00) && (C <= 0xDFFF)) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
if ((C < 0xD800) || (C > 0xDBFF))
V = C;
else {
k++;
if (k == str->length) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
C2 = str->chars[k];
if ((C2 < 0xDC00) || (C2 > 0xDFFF)) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
}
L = oneUcs4ToUtf8Char(utf8buf, V);
for (j = 0; j < L; j++) {
sprintf(hexBuf, "%%%.2X", utf8buf[j]);
if (!add_bytes(cx, R, hexBuf, 3))
return JS_FALSE;
}
}
k++;
}
*rval = STRING_TO_JSVAL(R);
return JS_TRUE;
}
static JSBool decode(JSContext *cx, JSString *str, JSString *reservedSet, jsval *rval)
{
size_t start, k = 0;
jschar C, H;
uint32 V;
uint B;
unsigned char octets[6];
JSString *R;
int16 j, n;
R = js_NewString(cx, NULL, 0, 0);
if (!R)
return JS_FALSE;
while (k < str->length) {
C = str->chars[k];
if (C == '%') {
start = k;
if ((k + 2) >= str->length) goto errOut;
if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2]))
goto errOut;
B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
k += 2;
if (!(B & 0x80))
C = B;
else {
n = 1;
while (B & (0x80 >> n)) n++;
if ((n == 1) || (n > 6)) goto errOut;
octets[0] = (char)B;
if ((k + 3 * (n - 1)) >= str->length) goto errOut;
for (j = 1; j < n; j++) {
k++;
if (str->chars[k] != '%') goto errOut;
if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2]))
goto errOut;
B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
if ((B & 0xC0) != 0x80) goto errOut;
k += 2;
octets[j] = (char)B;
}
V = utf8ToOneUcs4Char(octets, n);
if (V >= 0x10000) {
V -= 0x10000;
if (V > 0xFFFFF) goto errOut;
C = (jschar)((V & 0x3FF) + 0xDC00);
H = (jschar)((V >> 10) + 0xD800);
if (!add_chars(cx, R, &H, 1)) return JS_FALSE;
}
else
C = (jschar)V;
}
if (js_strchr(reservedSet->chars, C)) {
if (!add_chars(cx, R, &str->chars[start], (k - start + 1))) return JS_FALSE;
}
else
if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
}
else
if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
k++;
}
*rval = STRING_TO_JSVAL(R);
return JS_TRUE;
errOut:
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_BAD_URI, NULL);
return JS_FALSE;
}
static JSBool
str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
JSString *str, *reservedURISet;
str = js_ValueToString(cx, argv[0]);
if (!str)
return JS_FALSE;
reservedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
if (!reservedURISet)
return JS_FALSE;
return decode(cx, str, reservedURISet, rval);
}
static JSBool
str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
JSString *str;
str = js_ValueToString(cx, argv[0]);
if (!str)
return JS_FALSE;
return decode(cx, str, cx->runtime->emptyString, rval);
}
static JSBool
str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
JSString *str, *unescapedURISet;
str = js_ValueToString(cx, argv[0]);
if (!str)
return JS_FALSE;
unescapedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
if (!unescapedURISet)
return JS_FALSE;
if (!add_bytes(cx, unescapedURISet, uriUnescaped, strlen(uriUnescaped)))
return JS_FALSE;
return encode(cx, str, unescapedURISet, rval);
}
static JSBool
str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
JSString *str, *unescapedURISet;
str = js_ValueToString(cx, argv[0]);
if (!str)
return JS_FALSE;
unescapedURISet = JS_NewStringCopyZ(cx, uriUnescaped);
if (!unescapedURISet)
return JS_FALSE;
return encode(cx, str, unescapedURISet, rval);
}
/* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be
* at least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char)
{
int utf8Length = 1;
JS_ASSERT(ucs4Char <= 0x7FFFFFFF);
if (ucs4Char < 0x80)
*utf8Buffer = (unsigned char)ucs4Char;
else {
int i;
uint32 a = ucs4Char >> 11;
utf8Length = 2;
while (a) {
a >>= 5;
utf8Length++;
}
i = utf8Length;
while (--i) {
utf8Buffer[i] = (unsigned char)(ucs4Char & 0x3F | 0x80);
ucs4Char >>= 6;
}
*utf8Buffer = (unsigned char)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
}
return utf8Length;
}
/* Convert a utf8 character sequence into a UCS-4 character and return that
* character. It is assumed that the caller already checked that the sequence is valid.
*/
static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length)
{
uint32 ucs4Char;
JS_ASSERT(utf8Length >= 1 && utf8Length <= 6);
if (utf8Length == 1) {
ucs4Char = *utf8Buffer;
JS_ASSERT(!(ucs4Char & 0x80));
} else {
JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7-utf8Length)))) == (0x100 - (1 << (8-utf8Length))));
ucs4Char = *utf8Buffer++ & (1<<(7-utf8Length))-1;
while (--utf8Length) {
JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);
ucs4Char = ucs4Char<<6 | *utf8Buffer++ & 0x3F;
}
}
return ucs4Char;
}

Просмотреть файл

@ -122,11 +122,39 @@ typedef enum JSCharType {
(1 << JSCT_DECIMAL_DIGIT_NUMBER)) \
>> JS_CTYPE(c)) & 1)
/* A unicode letter, suitable for use in an identifier. */
#define JS_ISUC_LETTER(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER)) \
>> JS_CTYPE(c)) & 1)
/*
* 'IdentifierPart' from ECMA grammar, is Unicode letter or
* combining mark or digit or connector punctuation.
*/
#define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) | \
(1 << JSCT_LOWERCASE_LETTER) | \
(1 << JSCT_TITLECASE_LETTER) | \
(1 << JSCT_MODIFIER_LETTER) | \
(1 << JSCT_OTHER_LETTER) | \
(1 << JSCT_LETTER_NUMBER) | \
(1 << JSCT_NON_SPACING_MARK) | \
(1 << JSCT_COMBINING_SPACING_MARK) | \
(1 << JSCT_DECIMAL_DIGIT_NUMBER) | \
(1 << JSCT_CONNECTOR_PUNCTUATION)) \
>> JS_CTYPE(c)) & 1)
/* Unicode control-format characters, ignored in input */
#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
#define JS_ISWORD(c) (JS_ISALNUM(c) || (c) == '_')
/* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
#define JS_ISIDENT(c) ((c) < 128 && (isalpha(c) || (c) == '_' || (c) == '$'))
#define JS_ISIDENT2(c) ((c) < 128 && (isalnum(c) || (c) == '_' || (c) == '$'))
#define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
#define JS_ISIDENT(c) (JS_ISID_PART(c) || (c) == '_' || (c) == '$')
#define JS_ISDIGIT(c) (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)