From e7db24f8d5d284a2f763c9a89382bf56471f6460 Mon Sep 17 00:00:00 2001
From: Brian Crowder <crowder@fiverocks.com>
Date: Thu, 17 Jul 2008 16:58:06 -0400
Subject: [PATCH] Bug 430740 - BOM characters are stripped from javascript
 before execution r=igor

---
 js/src/jsscan.cpp | 273 +++++++++++++++++++++++-----------------------
 1 file changed, 138 insertions(+), 135 deletions(-)

diff --git a/js/src/jsscan.cpp b/js/src/jsscan.cpp
index 93eb56b13aa..3320f6ff08d 100644
--- a/js/src/jsscan.cpp
+++ b/js/src/jsscan.cpp
@@ -298,150 +298,144 @@ GetChar(JSTokenStream *ts)
     if (ts->ungetpos != 0) {
         c = ts->ungetbuf[--ts->ungetpos];
     } else {
-        do {
-            if (ts->linebuf.ptr == ts->linebuf.limit) {
-                len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
+        if (ts->linebuf.ptr == ts->linebuf.limit) {
+            len = PTRDIFF(ts->userbuf.limit, ts->userbuf.ptr, jschar);
+            if (len <= 0) {
+                if (!ts->file) {
+                    ts->flags |= TSF_EOF;
+                    return EOF;
+                }
+        
+                /* Fill ts->userbuf so that \r and \r\n convert to \n. */
+                crflag = (ts->flags & TSF_CRFLAG) != 0;
+                len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
                 if (len <= 0) {
-                    if (!ts->file) {
-                        ts->flags |= TSF_EOF;
-                        return EOF;
-                    }
-            
-                    /* Fill ts->userbuf so that \r and \r\n convert to \n. */
-                    crflag = (ts->flags & TSF_CRFLAG) != 0;
-                    len = js_fgets(cbuf, JS_LINE_LIMIT - crflag, ts->file);
-                    if (len <= 0) {
-                        ts->flags |= TSF_EOF;
-                        return EOF;
-                    }
-                    olen = len;
-                    ubuf = ts->userbuf.base;
-                    i = 0;
-                    if (crflag) {
-                        ts->flags &= ~TSF_CRFLAG;
-                        if (cbuf[0] != '\n') {
-                            ubuf[i++] = '\n';
-                            len++;
-                            ts->linepos--;
-                        }
-                    }
-                    for (j = 0; i < len; i++, j++)
-                        ubuf[i] = (jschar) (unsigned char) cbuf[j];
-                    ts->userbuf.limit = ubuf + len;
-                    ts->userbuf.ptr = ubuf;
+                    ts->flags |= TSF_EOF;
+                    return EOF;
                 }
-                if (ts->listener) {
-                    ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
-                                 &ts->listenerTSData, ts->listenerData);
-                }
-            
-                nl = ts->saveEOL;
-                if (!nl) {
-                    /*
-                     * Any one of \n, \r, or \r\n ends a line (the longest
-                     * match wins).  Also allow the Unicode line and paragraph
-                     * separators.
-                     */
-                    for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
-                        /*
-                         * Try to prevent value-testing on most characters by
-                         * filtering out characters that aren't 000x or 202x.
-                         */
-                        if ((*nl & 0xDFD0) == 0) {
-                            if (*nl == '\n')
-                                break;
-                            if (*nl == '\r') {
-                                if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
-                                    nl++;
-                                break;
-                            }
-                            if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
-                                break;
-                        }
-                    }
-                }
-            
-                /*
-                 * If there was a line terminator, copy thru it into linebuf.
-                 * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
-                 */
-                if (nl < ts->userbuf.limit)
-                    len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
-                if (len >= JS_LINE_LIMIT) {
-                    len = JS_LINE_LIMIT - 1;
-                    ts->saveEOL = nl;
-                } else {
-                    ts->saveEOL = NULL;
-                }
-                js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
-                ts->userbuf.ptr += len;
                 olen = len;
-            
+                ubuf = ts->userbuf.base;
+                i = 0;
+                if (crflag) {
+                    ts->flags &= ~TSF_CRFLAG;
+                    if (cbuf[0] != '\n') {
+                        ubuf[i++] = '\n';
+                        len++;
+                        ts->linepos--;
+                    }
+                }
+                for (j = 0; i < len; i++, j++)
+                    ubuf[i] = (jschar) (unsigned char) cbuf[j];
+                ts->userbuf.limit = ubuf + len;
+                ts->userbuf.ptr = ubuf;
+            }
+            if (ts->listener) {
+                ts->listener(ts->filename, ts->lineno, ts->userbuf.ptr, len,
+                             &ts->listenerTSData, ts->listenerData);
+            }
+        
+            nl = ts->saveEOL;
+            if (!nl) {
                 /*
-                 * Make sure linebuf contains \n for EOL (don't do this in
-                 * userbuf because the user's string might be readonly).
+                 * Any one of \n, \r, or \r\n ends a line (the longest
+                 * match wins).  Also allow the Unicode line and paragraph
+                 * separators.
                  */
-                if (nl < ts->userbuf.limit) {
-                    if (*nl == '\r') {
-                        if (ts->linebuf.base[len-1] == '\r') {
-                            /*
-                             * Does the line segment end in \r?  We must check
-                             * for a \n at the front of the next segment before
-                             * storing a \n into linebuf.  This case matters
-                             * only when we're reading from a file.
-                             */
-                            if (nl + 1 == ts->userbuf.limit && ts->file) {
-                                len--;
-                                ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
-                                if (len == 0) {
-                                    /*
-                                     * This can happen when a segment ends in
-                                     * \r\r.  Start over.  ptr == limit in this
-                                     * case, so we'll fall into buffer-filling
-                                     * code.
-                                     */
-                                    return GetChar(ts);
-                                }
-                            } else {
-                                ts->linebuf.base[len-1] = '\n';
-                            }
+                for (nl = ts->userbuf.ptr; nl < ts->userbuf.limit; nl++) {
+                    /*
+                     * Try to prevent value-testing on most characters by
+                     * filtering out characters that aren't 000x or 202x.
+                     */
+                    if ((*nl & 0xDFD0) == 0) {
+                        if (*nl == '\n')
+                            break;
+                        if (*nl == '\r') {
+                            if (nl + 1 < ts->userbuf.limit && nl[1] == '\n')
+                                nl++;
+                            break;
                         }
-                    } else if (*nl == '\n') {
-                        if (nl > ts->userbuf.base &&
-                            nl[-1] == '\r' &&
-                            ts->linebuf.base[len-2] == '\r') {
+                        if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR)
+                            break;
+                    }
+                }
+            }
+        
+            /*
+             * If there was a line terminator, copy thru it into linebuf.
+             * Else copy JS_LINE_LIMIT-1 bytes into linebuf.
+             */
+            if (nl < ts->userbuf.limit)
+                len = PTRDIFF(nl, ts->userbuf.ptr, jschar) + 1;
+            if (len >= JS_LINE_LIMIT) {
+                len = JS_LINE_LIMIT - 1;
+                ts->saveEOL = nl;
+            } else {
+                ts->saveEOL = NULL;
+            }
+            js_strncpy(ts->linebuf.base, ts->userbuf.ptr, len);
+            ts->userbuf.ptr += len;
+            olen = len;
+        
+            /*
+             * Make sure linebuf contains \n for EOL (don't do this in
+             * userbuf because the user's string might be readonly).
+             */
+            if (nl < ts->userbuf.limit) {
+                if (*nl == '\r') {
+                    if (ts->linebuf.base[len-1] == '\r') {
+                        /*
+                         * Does the line segment end in \r?  We must check
+                         * for a \n at the front of the next segment before
+                         * storing a \n into linebuf.  This case matters
+                         * only when we're reading from a file.
+                         */
+                        if (nl + 1 == ts->userbuf.limit && ts->file) {
                             len--;
-                            JS_ASSERT(ts->linebuf.base[len] == '\n');
+                            ts->flags |= TSF_CRFLAG; /* clear NLFLAG? */
+                            if (len == 0) {
+                                /*
+                                 * This can happen when a segment ends in
+                                 * \r\r.  Start over.  ptr == limit in this
+                                 * case, so we'll fall into buffer-filling
+                                 * code.
+                                 */
+                                return GetChar(ts);
+                            }
+                        } else {
                             ts->linebuf.base[len-1] = '\n';
                         }
-                    } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
+                    }
+                } else if (*nl == '\n') {
+                    if (nl > ts->userbuf.base &&
+                        nl[-1] == '\r' &&
+                        ts->linebuf.base[len-2] == '\r') {
+                        len--;
+                        JS_ASSERT(ts->linebuf.base[len] == '\n');
                         ts->linebuf.base[len-1] = '\n';
                     }
+                } else if (*nl == LINE_SEPARATOR || *nl == PARA_SEPARATOR) {
+                    ts->linebuf.base[len-1] = '\n';
                 }
-            
-                /* Reset linebuf based on adjusted segment length. */
-                ts->linebuf.limit = ts->linebuf.base + len;
-                ts->linebuf.ptr = ts->linebuf.base;
-            
-                /* Update position of linebuf within physical userbuf line. */
-                if (!(ts->flags & TSF_NLFLAG))
-                    ts->linepos += ts->linelen;
-                else
-                    ts->linepos = 0;
-                if (ts->linebuf.limit[-1] == '\n')
-                    ts->flags |= TSF_NLFLAG;
-                else
-                    ts->flags &= ~TSF_NLFLAG;
-            
-                /* Update linelen from original segment length. */
-                ts->linelen = olen;
             }
-            c = *ts->linebuf.ptr++;
-        /*
-         * In the hopes of being liberal in what we accept, we toss out little-
-         * and big-endian byte order markers here, see bug 368516.
-         */
-        } while (c == 0xfffe || c == 0xfeff);
+        
+            /* Reset linebuf based on adjusted segment length. */
+            ts->linebuf.limit = ts->linebuf.base + len;
+            ts->linebuf.ptr = ts->linebuf.base;
+        
+            /* Update position of linebuf within physical userbuf line. */
+            if (!(ts->flags & TSF_NLFLAG))
+                ts->linepos += ts->linelen;
+            else
+                ts->linepos = 0;
+            if (ts->linebuf.limit[-1] == '\n')
+                ts->flags |= TSF_NLFLAG;
+            else
+                ts->flags &= ~TSF_NLFLAG;
+        
+            /* Update linelen from original segment length. */
+            ts->linelen = olen;
+        }
+        c = *ts->linebuf.ptr++;
     }
     if (c == '\n')
         ts->lineno++;
@@ -990,6 +984,15 @@ NewToken(JSTokenStream *ts, ptrdiff_t adjust)
     return tp;
 }
 
+static JS_INLINE JSBool
+ScanAsSpace(jschar c)
+{
+    /* Treat little- and big-endian BOMs as whitespace for compatibility. */
+    if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
+        return JS_TRUE;
+    return JS_FALSE;
+}
+
 JSTokenType
 js_GetToken(JSContext *cx, JSTokenStream *ts)
 {
@@ -1200,7 +1203,7 @@ retry:
             if (ts->flags & TSF_NEWLINES)
                 break;
         }
-    } while (JS_ISSPACE(c));
+    } while (ScanAsSpace(c));
 
     tp = NewToken(ts, -1);
     if (c == EOF) {
@@ -1722,7 +1725,7 @@ retry:
                     cp[3] == 'n' &&
                     cp[4] == 'e') {
                     SkipChars(ts, 5);
-                    while ((c = GetChar(ts)) != '\n' && JS_ISSPACE(c))
+                    while ((c = GetChar(ts)) != '\n' && ScanAsSpace(c))
                         continue;
                     if (JS7_ISDEC(c)) {
                         line = JS7_UNDEC(c);
@@ -1734,7 +1737,7 @@ retry:
                             }
                             line = temp;
                         }
-                        while (c != '\n' && JS_ISSPACE(c))
+                        while (c != '\n' && ScanAsSpace(c))
                             c = GetChar(ts);
                         i = 0;
                         if (c == '"') {
@@ -1749,7 +1752,7 @@ retry:
                             }
                             if (c == '"') {
                                 while ((c = GetChar(ts)) != '\n' &&
-                                       JS_ISSPACE(c)) {
+                                       ScanAsSpace(c)) {
                                     continue;
                                 }
                             }