r=norris,waldemar

Fixes for bugs#23607, 23608, 23610, 23612, 23613. Also, first cut at URI encode & decode routines.
2000-01-26 22:15:06 +00:00 · 2000-01-26 22:15:06 +00:00 · f3a0bbab33
--- a/js/src/js.msg
+++ b/js/src/js.msg
@ -231,3 +231,4 @@ MSG_DEF(JSMSG_REDECLARED_VAR,         155, 2, JSEXN_TYPEERR, "redeclaration of {
 MSG_DEF(JSMSG_UNDECLARED_VAR,         156, 1, JSEXN_TYPEERR, "assignment to undeclared variable {0}")
 MSG_DEF(JSMSG_ANON_NO_RETURN_VALUE,   157, 0, JSEXN_TYPEERR, "anonymous function does not always return a value")
 MSG_DEF(JSMSG_DEPRECATED_USAGE,       158, 1, JSEXN_REFERENCEERR, "deprecated {0} usage")
+MSG_DEF(JSMSG_BAD_URI,				  159, 0, JSEXN_URIERR, "malformed URI sequence")
--- a/js/src/jsfun.c
+++ b/js/src/jsfun.c
@ -1333,11 +1333,16 @@ js_IsIdentifier(JSString *str)
    n = str->length;
    s = str->chars;
    c = *s;
-    if (n == 0 || !JS_ISIDENT(c))
+    /*
+    * We don't handle unicode escape sequences here
+    * because they won't be in the input string.
+    * (Right?)
+    */
+    if (n == 0 || !JS_ISIDENT_START(c))
 	return JS_FALSE;
    for (n--; n != 0; n--) {
 	c = *++s;
-	if (!JS_ISIDENT2(c))
+	if (!JS_ISIDENT(c))
 	    return JS_FALSE;
    }
    return JS_TRUE;
--- a/js/src/jsscan.c
+++ b/js/src/jsscan.c
@ -276,126 +276,128 @@ GetChar(JSTokenStream *ts)
    if (ts->ungetpos != 0) {
 	c = ts->ungetbuf[--ts->ungetpos];
    } else {
-	if (ts->linebuf.ptr == ts->linebuf.limit) {
-	    len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
-	    if (len <= 0) {
-		/* Fill ts->userbuf so that \r and \r\n convert to \n. */
-		if (ts->file) {
-		    JSBool crflag;
-		    char cbuf[JS_LINE_LIMIT];
-		    jschar *ubuf;
-		    ptrdiff_t i, j;
+        do {
+	    if (ts->linebuf.ptr == ts->linebuf.limit) {
+	        len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
+	        if (len <= 0) {
+		    /* Fill ts->userbuf so that \r and \r\n convert to \n. */
+		    if (ts->file) {
+		        JSBool crflag;
+		        char cbuf[JS_LINE_LIMIT];
+		        jschar *ubuf;
+		        ptrdiff_t i, j;

-		    crflag = (ts->flags & TSF_CRFLAG) != 0;
-		    if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
-			ts->flags |= TSF_EOF;
-			return EOF;
+		        crflag = (ts->flags & TSF_CRFLAG) != 0;
+		        if (!fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file)) {
+			    ts->flags |= TSF_EOF;
+			    return EOF;
+		        }
+		        len = olen = strlen(cbuf);
+		        JS_ASSERT(len > 0);
+		        ubuf = ts->userbuf.base;
+		        i = 0;
+		        if (crflag) {
+			    ts->flags &= ~TSF_CRFLAG;
+			    if (cbuf[0] != '\n') {
+			        ubuf[i++] = '\n';
+			        len++;
+			        ts->linepos--;
+			    }
+		        }
+		        for (j = 0; i < len; i++, j++)
+			    ubuf[i] = (jschar) (unsigned char) cbuf[j];
+		        ts->userbuf.limit = ubuf + len;
+		        ts->userbuf.ptr = ubuf;
+		    } else {
+		        ts->flags |= TSF_EOF;
+		        return EOF;
 		    }
-		    len = olen = strlen(cbuf);
-		    JS_ASSERT(len > 0);
-		    ubuf = ts->userbuf.base;
-		    i = 0;
-		    if (crflag) {
-			ts->flags &= ~TSF_CRFLAG;
-			if (cbuf[0] != '\n') {
-			    ubuf[i++] = '\n';
-			    len++;
-			    ts->linepos--;
-			}
+	        }
+                if (ts->listener)
+                    (*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
+                                    &ts->listenerTSData, ts->listenerData);
+	        /*
+	         * Any one of \n, \r, or \r\n ends a line (longest match wins).
+	         */
+	        for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
+		    if (*nl == '\n')
+		        break;
+		    if (*nl == '\r') {
+		        if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
+			    nl++;
+		        break;
 		    }
-		    for (j = 0; i < len; i++, j++)
-			ubuf[i] = (jschar) (unsigned char) cbuf[j];
-		    ts->userbuf.limit = ubuf + len;
-		    ts->userbuf.ptr = ubuf;
-		} else {
-		    ts->flags |= TSF_EOF;
-		    return EOF;
-		}
-	    }
-            if (ts->listener)
-                (*ts->listener)(ts->filename, ts->lineno, ts->userbuf.ptr, len,
-                                &ts->listenerTSData, ts->listenerData);
-	    /*
-	     * Any one of \n, \r, or \r\n ends a line (longest match wins).
-	     */
-	    for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
-		if (*nl == '\n')
-		    break;
-		if (*nl == '\r') {
-		    if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
-			nl++;
-		    break;
-		}
-	    }
+	        }

-	    /*
-	     * If there was a line terminator, copy thru it into linebuf.
-	     * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
-	     */
-	    if (nl < ts->userbuf.limit)
-		len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
-	    if (len >= JS_LINE_LIMIT)
-		len = JS_LINE_LIMIT - 1;
-	    js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
-	    ts->userbuf.ptr += len;
-	    olen = len;
+	        /*
+	         * If there was a line terminator, copy thru it into linebuf.
+	         * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
+	         */
+	        if (nl < ts->userbuf.limit)
+		    len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
+	        if (len >= JS_LINE_LIMIT)
+		    len = JS_LINE_LIMIT - 1;
+	        js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
+	        ts->userbuf.ptr += len;
+	        olen = len;

-	    /*
-	     * Make sure linebuf contains \n for EOL (don't do this in
-	     * userbuf because the user's string might be readonly).
-	     */
-	    if (nl < ts->userbuf.limit) {
-		if (*nl == '\r') {
-		    if (ts->linebuf.base[len-1] == '\r') {
-                        /*
-                         * Does the line segment end in \r?  We must check for
-                         * a \n at the front of the next segment before storing
-                         * a \n into linebuf.  This case only matters when we're
-                         * reading from a file.
-                         */
-			if (nl + 1 == ts->userbuf.limit && ts->file) {
+	        /*
+	         * Make sure linebuf contains \n for EOL (don't do this in
+	         * userbuf because the user's string might be readonly).
+	         */
+	        if (nl < ts->userbuf.limit) {
+		    if (*nl == '\r') {
+		        if (ts->linebuf.base[len-1] == '\r') {
+                            /*
+                             * Does the line segment end in \r?  We must check for
+                             * a \n at the front of the next segment before storing
+                             * a \n into linebuf.  This case only matters when we're
+                             * reading from a file.
+                             */
+			    if (nl + 1 == ts->userbuf.limit && ts->file) {
+			        len--;
+			        ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
+                                if (len == 0) {
+                                    /*
+                                     * This can happen when a segment ends in \r\r.
+                                     * Start over.  ptr == limit in this case, so
+                                     * we'll fall into buffer-filling code.
+                                     */
+                                    return GetChar(ts);
+                                }
+			    } else
+                                ts->linebuf.base[len-1] = '\n';
+		        }
+		    } else if (*nl == '\n') {
+		        if (nl > ts->userbuf.base &&
+			    nl[-1] == '\r' &&
+			    ts->linebuf.base[len-2] == '\r') {
 			    len--;
-			    ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
-                            if (len == 0) {
-                                /*
-                                 * This can happen when a segment ends in \r\r.
-                                 * Start over.  ptr == limit in this case, so
-                                 * we'll fall into buffer-filling code.
-                                 */
-                                return GetChar(ts);
-                            }
-			} else
-                            ts->linebuf.base[len-1] = '\n';
+			    JS_ASSERT(ts->linebuf.base[len] == '\n');
+			    ts->linebuf.base[len-1] = '\n';
+		        }
 		    }
-		} else if (*nl == '\n') {
-		    if (nl > ts->userbuf.base &&
-			nl[-1] == '\r' &&
-			ts->linebuf.base[len-2] == '\r') {
-			len--;
-			JS_ASSERT(ts->linebuf.base[len] == '\n');
-			ts->linebuf.base[len-1] = '\n';
-		    }
-		}
+	        }
+
+	        /* Reset linebuf based on adjusted segment length. */
+	        ts->linebuf.limit = ts->linebuf.base + len;
+	        ts->linebuf.ptr = ts->linebuf.base;
+
+	        /* Update position of linebuf within physical line in userbuf. */
+	        if (!(ts->flags & TSF_NLFLAG))
+		    ts->linepos += ts->linelen;
+	        else
+		    ts->linepos = 0;
+	        if (ts->linebuf.limit[-1] == '\n')
+		    ts->flags |= TSF_NLFLAG;
+	        else
+		    ts->flags &= ~TSF_NLFLAG;
+
+	        /* Update linelen from original segment length. */
+	        ts->linelen = olen;
 	    }
-
-	    /* Reset linebuf based on adjusted segment length. */
-	    ts->linebuf.limit = ts->linebuf.base + len;
-	    ts->linebuf.ptr = ts->linebuf.base;
-
-	    /* Update position of linebuf within physical line in userbuf. */
-	    if (!(ts->flags & TSF_NLFLAG))
-		ts->linepos += ts->linelen;
-	    else
-		ts->linepos = 0;
-	    if (ts->linebuf.limit[-1] == '\n')
-		ts->flags |= TSF_NLFLAG;
-	    else
-		ts->flags &= ~TSF_NLFLAG;
-
-	    /* Update linelen from original segment length. */
-	    ts->linelen = olen;
-	}
-	c = *ts->linebuf.ptr++;
+	    c = *ts->linebuf.ptr++;
+        } while (JS_ISFORMAT(c));
    }
    if (c == '\n')
 	ts->lineno++;
@ -642,6 +644,30 @@ AddToTokenBuf(JSContext *cx, JSTokenBuf *tb, jschar c)
    return JS_TRUE;
 }

+/*
+* We encountered a '\', check for a following unicode
+* escape sequence - returning it's value if so.
+* Otherwise, non-destructively return the original '\'.
+*/
+static int32 
+getUnicodeEscape(JSTokenStream *ts)
+{
+    jschar cp[5];
+    int32 c;
+    if (PeekChars(ts, 5, cp) && (cp[0] == 'u') &&
+	    JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
+	    JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
+	c = (((((JS7_UNHEX(cp[1]) << 4)
+		+ JS7_UNHEX(cp[2])) << 4)
+	      + JS7_UNHEX(cp[3])) << 4)
+	    + JS7_UNHEX(cp[4]);
+	SkipChars(ts, 5);
+    }
+    else
+        c = '\\';
+    return c;
+}
+
 JSTokenType
 js_GetToken(JSContext *cx, JSTokenStream *ts)
 {
@ -649,6 +675,7 @@ js_GetToken(JSContext *cx, JSTokenStream *ts)
    JSToken *tp;
    int32 c;
    JSAtom *atom;
+    JSBool hadUnicodeEscape;

 #define INIT_TOKENBUF(tb)   ((tb)->ptr = (tb)->base)
 #define FINISH_TOKENBUF(tb) if (!AddToTokenBuf(cx, tb, 0)) RETURN(TOK_ERROR)
@ -688,13 +715,26 @@ retry:
    if (c == EOF)
 	RETURN(TOK_EOF);

-    if (JS_ISIDENT(c)) {
+    hadUnicodeEscape = JS_FALSE;
+    if (JS_ISIDENT_START(c) 
+                || ((c == '\\') 
+                        && (c = getUnicodeEscape(ts), 
+                            hadUnicodeEscape = JS_ISIDENT_START(c)))) {
 	INIT_TOKENBUF(&ts->tokenbuf);
-	do {
+        for (;;) {
 	    if (!AddToTokenBuf(cx, &ts->tokenbuf, (jschar)c))
 		RETURN(TOK_ERROR);
 	    c = GetChar(ts);
-	} while (JS_ISIDENT2(c));
+            if (c == '\\') {
+                c = getUnicodeEscape(ts);
+                if (JS_ISIDENT(c))
+                    hadUnicodeEscape = JS_TRUE;
+                else
+                    break;
+            }
+            else
+                if (!JS_ISIDENT(c)) break;
+        }
 	UngetChar(ts, c);
 	FINISH_TOKENBUF(&ts->tokenbuf);

@ -704,13 +744,16 @@ retry:
 			       0);
 	if (!atom)
 	    RETURN(TOK_ERROR);
-	if (atom->kwindex >= 0) {
-	    struct keyword *kw;
+        if (hadUnicodeEscape) /* Can never be a keyword, then. */
+            atom->kwindex = -1;
+        else
+	    if (atom->kwindex >= 0) {
+	        struct keyword *kw;

-	    kw = &keywords[atom->kwindex];
-	    tp->t_op = (JSOp) kw->op;
-	    RETURN(kw->tokentype);
-	}
+	        kw = &keywords[atom->kwindex];
+	        tp->t_op = (JSOp) kw->op;
+	        RETURN(kw->tokentype);
+	    }
 	tp->t_op = JSOP_NAME;
 	tp->t_atom = atom;
 	RETURN(TOK_NAME);
--- a/js/src/jsscan.h
+++ b/js/src/jsscan.h
@ -153,7 +153,7 @@ struct JSTokenStream {
    uintN               lookahead;      /* count of lookahead tokens */
    uintN               lineno;         /* current line number */
    uintN               ungetpos;       /* next free char slot in ungetbuf */
-    jschar              ungetbuf[4];    /* at most 4, for \uXXXX lookahead */
+    jschar              ungetbuf[5];    /* at most 5, for \uXXXX lookahead */
    uintN               flags;          /* flags -- see below */
    ptrdiff_t           linelen;        /* physical linebuf segment length */
    ptrdiff_t           linepos;        /* linebuf offset in physical line */
--- a/js/src/jsstr.c
+++ b/js/src/jsstr.c
@ -67,6 +67,20 @@
 #include "jsinterp.h"
 #endif

+/*
+* Forward declarations for URI encode/decode and helper routines
+*/
+static JSBool 
+str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
+static JSBool
+str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
+static JSBool
+str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
+static JSBool
+str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval);
+static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char);
+static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length);
+
 /* Contributions from the String class to the set of methods defined for the
 * global object.  escape and unescape used to be defined in the Mocha library,
 * but as ECMA decided to spec them, they've been moved to the core engine
@ -260,6 +274,11 @@ static JSFunctionSpec string_functions[] = {
 #if JS_HAS_UNEVAL
    {"uneval",              str_uneval,                 1,0,0},
 #endif
+    {"decodeURI",           str_decodeURI,              1,0,0},
+    {"encodeURI",           str_encodeURI,              1,0,0},
+    {"decodeURIComponent",  str_decodeURI_Component,    1,0,0},
+    {"encodeURIComponent",  str_encodeURI_Component,    1,0,0},
+
    {0,0,0,0,0}
 };

@ -865,7 +884,7 @@ match_or_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
 	ok = JS_TRUE;
 	re->lastIndex = 0;
 	for (count = 0; index <= str->length; count++) {
-	    ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);
+	    ok = js_ExecuteRegExp(cx, re, str, &index, JS_TRUE, rval);            
 	    if (!ok || *rval != JSVAL_TRUE)
 		break;
 	    ok = glob(cx, count, data);
@ -1066,6 +1085,12 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)
 	void *mark;
 	JSStackFrame *fp;
 	JSBool ok;
+        /*
+         * Save the rightContext from the current regexp, since it
+         * gets stuck at the end of the replacement string and may
+         * be clobbered by a RegExp usage in the lambda function.
+         */
+        JSSubString saveRightContext = cx->regExpStatics.rightContext;

 	/*
 	 * In the lambda case, not only do we find the replacement string's
@ -1143,6 +1168,7 @@ find_replen(JSContext *cx, ReplaceData *rdata, size_t *sizep)

      lambda_out:
 	js_FreeStack(cx, mark);
+        cx->regExpStatics.rightContext = saveRightContext;
 	return ok;
    }
 #endif /* JS_HAS_REPLACE_LAMBDA */
@ -1264,8 +1290,10 @@ str_replace(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
    rdata.length = 0;
    rdata.index = 0;
    rdata.leftIndex = 0;
-    /* for ECMA 3, the first argument is to be treated as a string 
-       (i.e. converted to one if necessary) UNLESS it's a reg.exp object */
+    /* 
+     * For ECMA 3, the first argument is to be treated as a string 
+     * (i.e. converted to one if necessary) UNLESS it's a reg.exp object.
+     */
    if (!match_or_replace(cx, obj, argc, argv, replace_glob, &rdata.base, rval,
                (cx->version == JSVERSION_DEFAULT || cx->version > JSVERSION_1_4)))
 	return JS_FALSE;
@ -2643,7 +2671,7 @@ const uint8 js_Y[] = {
  0,   0,   0,   0,   0,   0,   0,   0,  /*    0 */
  0,   1,   1,   1,   1,   1,   0,   0,  /*    0 */
  0,   0,   0,   0,   0,   0,   0,   0,  /*    0 */
-  0,   0,   0,   0,   1,   1,   1,   1,  /*    0 */
+  0,   0,   0,   0,   0,   0,   0,   0,  /*    0 */
  2,   3,   3,   3,   4,   3,   3,   3,  /*    0 */
  5,   6,   3,   7,   3,   8,   3,   3,  /*    0 */
  9,   9,   9,   9,   9,   9,   9,   9,  /*    0 */
@ -2660,7 +2688,7 @@ const uint8 js_Y[] = {
  0,   0,   0,   0,   0,   0,   0,   0,  /*    2 */
  0,   0,   0,   0,   0,   0,   0,   0,  /*    2 */
  0,   0,   0,   0,   0,   0,   0,   0,  /*    2 */
- 14,   3,   4,   4,   4,   4,  15,  15,  /*    2 */
+  2,   3,   4,   4,   4,   4,  15,  15,  /*    2 */
 11,  15,  16,   5,   7,   8,  15,  11,  /*    2 */
 15,   7,  17,  17,  11,  16,  15,   3,  /*    2 */
 11,  18,  16,   6,  19,  19,  19,   3,  /*    2 */
@ -3762,3 +3790,268 @@ js_ToLower(jschar c)
    return (v & 0x00200000) ? c + ((int32)v >> 22) : c;
 }
 #endif /* !__GNUC__ */
+
+char *uriReservedPlusPound = ";/?:@&=+$,#";
+char *uriUnescaped = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.!~*'()";
+
+/* concatenate jschars onto a JSString */
+static JSBool
+add_chars(JSContext *cx, JSString *str, jschar *chars, size_t length)
+{
+    str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
+    if (!str->chars)
+        return JS_FALSE;
+    js_strncpy(str->chars + str->length, chars, length);
+    str->length += length;
+    return JS_TRUE;
+}
+
+/* concatenate chars onto a JSString */
+static JSBool
+add_bytes(JSContext *cx, JSString *str, char *bytes, size_t length)
+{
+    size_t i;
+
+    str->chars = JS_realloc(cx, str->chars, (str->length + length + 1) * sizeof(jschar));
+    if (!str->chars)
+        return JS_FALSE;
+    for (i = 0; i < length; i++)
+	str->chars[str->length + i] = (unsigned char)bytes[i];
+    str->chars[str->length + length] = 0;
+    str->length += length;
+    return JS_TRUE;
+}
+
+/* 
+*   ECMA 3, 15.1.3 URI Handling Function Properties
+*
+*   The following are implementations of the algorithms
+*   given in the ECMA specification for the hidden functions
+*   'Encode' and 'Decode'.
+*/
+static JSBool encode(JSContext *cx, JSString *str, JSString *unescapedSet, jsval *rval)
+{
+    size_t j, k = 0, L;
+    jschar C, C2;
+    uint32 V;
+    unsigned char utf8buf[6];
+    char hexBuf[4];
+    JSString *R;
+
+    R = js_NewString(cx, NULL, 0, 0);
+    if (!R)
+        return JS_FALSE;
+
+    while (k < str->length) {
+        C = str->chars[k];
+        if (js_strchr(unescapedSet->chars, C)) {
+            if (!add_chars(cx, R, &C, 1))
+                return JS_FALSE;
+        } else {
+            if ((C >= 0xDC00) && (C <= 0xDFFF)) {
+                JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
+				 JSMSG_BAD_URI, NULL);
+                return JS_FALSE;
+            }
+            if ((C < 0xD800) || (C > 0xDBFF))
+                V = C;
+            else {
+                k++;
+                if (k == str->length) {
+                    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
+				     JSMSG_BAD_URI, NULL);
+                    return JS_FALSE;
+                }
+                C2 = str->chars[k];
+                if ((C2 < 0xDC00) || (C2 > 0xDFFF)) {
+                    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
+				     JSMSG_BAD_URI, NULL);
+                    return JS_FALSE;
+                }
+                V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
+            }
+            L = oneUcs4ToUtf8Char(utf8buf, V);
+            for (j = 0; j < L; j++) {
+                sprintf(hexBuf, "%%%.2X", utf8buf[j]);
+                if (!add_bytes(cx, R, hexBuf, 3))
+                    return JS_FALSE;
+            }
+        }
+        k++;
+    }
+    *rval = STRING_TO_JSVAL(R);
+    return JS_TRUE;
+}
+
+static JSBool decode(JSContext *cx, JSString *str, JSString *reservedSet, jsval *rval)
+{
+    size_t start, k = 0;
+    jschar C, H;
+    uint32 V;
+    uint B;
+    unsigned char octets[6];
+    JSString *R;
+    int16 j, n;
+
+    R = js_NewString(cx, NULL, 0, 0);
+    if (!R)
+        return JS_FALSE;
+
+    while (k < str->length) {
+        C = str->chars[k];
+        if (C == '%') {
+            start = k;
+            if ((k + 2) >= str->length) goto errOut;
+            if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2])) 
+                goto errOut;
+            B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
+            k += 2;
+            if (!(B & 0x80))
+                C = B;
+            else {
+                n = 1;
+                while (B & (0x80 >> n)) n++;
+                if ((n == 1) || (n > 6)) goto errOut;
+                octets[0] = (char)B;
+                if ((k + 3 * (n - 1)) >= str->length) goto errOut;
+                for (j = 1; j < n; j++) {
+                    k++;
+                    if (str->chars[k] != '%') goto errOut;
+                    if (!JS7_ISHEX(str->chars[k + 1]) || !JS7_ISHEX(str->chars[k + 2])) 
+                        goto errOut;
+                    B = JS7_UNHEX(str->chars[k + 1]) * 16 + JS7_UNHEX(str->chars[k + 2]);
+                    if ((B & 0xC0) != 0x80) goto errOut;
+                    k += 2;
+                    octets[j] = (char)B;
+                }
+                V = utf8ToOneUcs4Char(octets, n);
+                if (V >= 0x10000) {
+                    V -= 0x10000;
+                    if (V > 0xFFFFF) goto errOut;
+                    C = (jschar)((V & 0x3FF) + 0xDC00);
+                    H = (jschar)((V >> 10) + 0xD800);
+                    if (!add_chars(cx, R, &H, 1)) return JS_FALSE;
+                }
+                else
+                    C = (jschar)V;
+            }
+            if (js_strchr(reservedSet->chars, C)) {
+                if (!add_chars(cx, R, &str->chars[start], (k - start + 1))) return JS_FALSE;
+            }
+            else
+                if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
+        }
+        else
+            if (!add_chars(cx, R, &C, 1)) return JS_FALSE;
+        k++;
+    }
+    *rval = STRING_TO_JSVAL(R);
+    return JS_TRUE;
+errOut:
+    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
+		     JSMSG_BAD_URI, NULL);
+    return JS_FALSE;
+}
+
+static JSBool
+str_decodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
+{
+    JSString *str, *reservedURISet;
+    str = js_ValueToString(cx, argv[0]);
+    if (!str)
+	return JS_FALSE;
+    reservedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
+    if (!reservedURISet)
+        return JS_FALSE;
+    return decode(cx, str, reservedURISet, rval);
+}
+
+static JSBool
+str_decodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
+{
+    JSString *str;
+    str = js_ValueToString(cx, argv[0]);
+    if (!str)
+	return JS_FALSE;
+    return decode(cx, str, cx->runtime->emptyString, rval);
+}
+
+static JSBool
+str_encodeURI(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
+{
+    JSString *str, *unescapedURISet;    
+    str = js_ValueToString(cx, argv[0]);
+    if (!str)
+	return JS_FALSE;
+    unescapedURISet = JS_NewStringCopyZ(cx, uriReservedPlusPound);
+    if (!unescapedURISet)
+        return JS_FALSE;
+    if (!add_bytes(cx, unescapedURISet, uriUnescaped, strlen(uriUnescaped)))
+        return JS_FALSE;
+    return encode(cx, str, unescapedURISet, rval);
+}
+
+static JSBool
+str_encodeURI_Component(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
+{
+    JSString *str, *unescapedURISet;
+    str = js_ValueToString(cx, argv[0]);
+    if (!str)
+	return JS_FALSE;
+    unescapedURISet = JS_NewStringCopyZ(cx, uriUnescaped);
+    if (!unescapedURISet)
+        return JS_FALSE;
+    return encode(cx, str, unescapedURISet, rval);
+}
+
+
+/* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be
+ * at least 6 bytes long.  Return the number of UTF-8 bytes of data written.
+ */
+static int oneUcs4ToUtf8Char(unsigned char *utf8Buffer, uint32 ucs4Char)
+{
+    int utf8Length = 1;
+
+    JS_ASSERT(ucs4Char <= 0x7FFFFFFF);
+    if (ucs4Char < 0x80)
+        *utf8Buffer = (unsigned char)ucs4Char;
+    else {
+        int i;
+        uint32 a = ucs4Char >> 11;
+        utf8Length = 2;
+        while (a) {
+            a >>= 5;
+            utf8Length++;
+        }
+        i = utf8Length;
+        while (--i) {
+            utf8Buffer[i] = (unsigned char)(ucs4Char & 0x3F | 0x80);
+            ucs4Char >>= 6;
+        }
+        *utf8Buffer = (unsigned char)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
+    }
+    return utf8Length;
+}
+
+
+/* Convert a utf8 character sequence into a UCS-4 character and return that
+ * character.  It is assumed that the caller already checked that the sequence is valid.
+ */
+static uint32 utf8ToOneUcs4Char(const unsigned char *utf8Buffer, int utf8Length)
+{
+    uint32 ucs4Char;
+
+    JS_ASSERT(utf8Length >= 1 && utf8Length <= 6);
+    if (utf8Length == 1) {
+        ucs4Char = *utf8Buffer;
+        JS_ASSERT(!(ucs4Char & 0x80));
+    } else {
+        JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7-utf8Length)))) == (0x100 - (1 << (8-utf8Length))));
+        ucs4Char = *utf8Buffer++ & (1<<(7-utf8Length))-1;
+        while (--utf8Length) {
+            JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);
+            ucs4Char = ucs4Char<<6 | *utf8Buffer++ & 0x3F;
+        }
+    }
+    return ucs4Char;
+}
--- a/js/src/jsstr.h
+++ b/js/src/jsstr.h
@ -122,11 +122,39 @@ typedef enum JSCharType {
 			   (1 << JSCT_DECIMAL_DIGIT_NUMBER))                  \
 			  >> JS_CTYPE(c)) & 1)

+/* A unicode letter, suitable for use in an identifier. */
+#define JS_ISUC_LETTER(c)   ((((1 << JSCT_UPPERCASE_LETTER) |                 \
+			   (1 << JSCT_LOWERCASE_LETTER) |                     \
+			   (1 << JSCT_TITLECASE_LETTER) |                     \
+			   (1 << JSCT_MODIFIER_LETTER) |                      \
+			   (1 << JSCT_OTHER_LETTER) |                         \
+			   (1 << JSCT_LETTER_NUMBER))                         \
+			  >> JS_CTYPE(c)) & 1)
+
+/*
+* 'IdentifierPart' from ECMA grammar, is Unicode letter or
+* combining mark or digit or connector punctuation.
+*/
+#define JS_ISID_PART(c) ((((1 << JSCT_UPPERCASE_LETTER) |                     \
+			   (1 << JSCT_LOWERCASE_LETTER) |                     \
+			   (1 << JSCT_TITLECASE_LETTER) |                     \
+			   (1 << JSCT_MODIFIER_LETTER) |                      \
+			   (1 << JSCT_OTHER_LETTER) |                         \
+			   (1 << JSCT_LETTER_NUMBER) |                        \
+			   (1 << JSCT_NON_SPACING_MARK) |                     \
+			   (1 << JSCT_COMBINING_SPACING_MARK) |               \
+			   (1 << JSCT_DECIMAL_DIGIT_NUMBER) |                 \
+			   (1 << JSCT_CONNECTOR_PUNCTUATION))                 \
+			  >> JS_CTYPE(c)) & 1)
+
+/* Unicode control-format characters, ignored in input */
+#define JS_ISFORMAT(c) (((1 << JSCT_FORMAT) >> JS_CTYPE(c)) & 1)
+
 #define JS_ISWORD(c)    (JS_ISALNUM(c) || (c) == '_')

 /* XXXbe unify on A/X/Y tbls, avoid ctype.h? */
-#define JS_ISIDENT(c)   ((c) < 128 && (isalpha(c) || (c) == '_' || (c) == '$'))
-#define JS_ISIDENT2(c)  ((c) < 128 && (isalnum(c) || (c) == '_' || (c) == '$'))
+#define JS_ISIDENT_START(c) (JS_ISUC_LETTER(c) || (c) == '_' || (c) == '$')
+#define JS_ISIDENT(c)       (JS_ISID_PART(c) || (c) == '_' || (c) == '$')

 #define JS_ISDIGIT(c)   (JS_CTYPE(c) == JSCT_DECIMAL_DIGIT_NUMBER)