Tweak E4X auto-detection to cope with more HTML comment madness (310993, r=mrbkap).

2005-10-04 06:48:32 +00:00 · 2005-10-04 06:48:32 +00:00 · 57373235e5
--- a/js/src/jsparse.c
+++ b/js/src/jsparse.c
@ -1047,9 +1047,9 @@ Statements(JSContext *cx, JSTokenStream *ts, JSTreeContext *tc)
        return NULL;
    PN_INIT_LIST(pn);

-    ts->flags |= TSF_OPERAND;
+    ts->flags |= TSF_OPERAND | TSF_START_STATEMENT;
    while ((tt = js_PeekToken(cx, ts)) > TOK_EOF && tt != TOK_RC) {
-        ts->flags &= ~TSF_OPERAND;
+        ts->flags &= ~(TSF_OPERAND | TSF_START_STATEMENT);
        pn2 = Statement(cx, ts, tc);
        if (!pn2) {
            if (ts->flags & TSF_EOF)
@ -1091,7 +1091,7 @@ Statements(JSContext *cx, JSTokenStream *ts, JSTreeContext *tc)
            PN_APPEND(pn, pn2);
        }
    }
-    ts->flags &= ~TSF_OPERAND;
+    ts->flags &= ~(TSF_OPERAND | TSF_START_STATEMENT);
    if (tt == TOK_ERROR)
        return NULL;

@ -1265,9 +1265,9 @@ Statement(JSContext *cx, JSTokenStream *ts, JSTreeContext *tc)

    CHECK_RECURSION();

-    ts->flags |= TSF_OPERAND;
+    ts->flags |= TSF_OPERAND | TSF_START_STATEMENT;
    tt = js_GetToken(cx, ts);
-    ts->flags &= ~TSF_OPERAND;
+    ts->flags &= ~(TSF_OPERAND | TSF_START_STATEMENT);

 #if JS_HAS_GETTER_SETTER
    if (tt == TOK_NAME) {
--- a/js/src/jsscan.c
+++ b/js/src/jsscan.c
@ -1636,12 +1636,22 @@ retry:
      case '<':
 #if JS_HAS_XML_SUPPORT
        /*
-         * XXX Use TSF_DIRTYLINE for now rather than TSF_DIRTYINPUT, because
-         * believe it or not, some .js files included via <script src="...">
-         * actually contain HTML comment-hiding hacks.
+         * After much testing, it's clear that Postel's advice to protocol
+         * designers ("be liberal in what you accept, and conservative in what
+         * you send") invites a natural-law repercussion for JS as "protocol":
+         *
+         * "If you are liberal in what you accept, others will utterly fail to
+         *  be conservative in what they send."
+         *
+         * Which means you will get <!-- comments to end of line in the middle
+         * of .js files, and after if conditions whose then statements are on
+         * the next line, and other wonders.  See at least the following bugs:
+         * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
+         * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
+         * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
         */
        if ((ts->flags & TSF_OPERAND) &&
-            (JS_HAS_XML_OPTION(cx) || (ts->flags & TSF_DIRTYLINE))) {
+            (JS_HAS_XML_OPTION(cx) || !(ts->flags & TSF_START_STATEMENT))) {
            /* Check for XML comment or CDATA section. */
            if (MatchChar(ts, '!')) {
                INIT_TOKENBUF();
@ -1754,7 +1764,7 @@ retry:
        if (MatchChar(ts, '!')) {
            if (MatchChar(ts, '-')) {
                if (MatchChar(ts, '-')) {
-                    ts->flags |= TSF_INHTMLCOMMENT;
+                    ts->flags |= TSF_IN_HTML_COMMENT;
                    goto skipline;
                }
                UngetChar(ts, '-');
@ -1855,10 +1865,10 @@ retry:

 skipline:
            /* Optimize line skipping if we are not in an HTML comment. */
-            if (ts->flags & TSF_INHTMLCOMMENT) {
+            if (ts->flags & TSF_IN_HTML_COMMENT) {
                while ((c = GetChar(ts)) != EOF && c != '\n') {
                    if (c == '-' && MatchChar(ts, '-') && MatchChar(ts, '>'))
-                        ts->flags &= ~(TSF_DIRTYINPUT | TSF_INHTMLCOMMENT);
+                        ts->flags &= ~TSF_IN_HTML_COMMENT;
                }
            } else {
                while ((c = GetChar(ts)) != EOF && c != '\n')
@ -1992,13 +2002,7 @@ skipline:
            tt = TOK_ASSIGN;
        } else if (MatchChar(ts, c)) {
            if (PeekChar(ts) == '>' && !(ts->flags & TSF_DIRTYLINE)) {
-                /*
-                 * Clear TSF_DIRTYINPUT as well as TSF_INHTMLCOMMENT, just
-                 * in case another HTML comment hiding hack follows this one.
-                 * It's unusual to have more than one per <script> content,
-                 * but possible.
-                 */
-                ts->flags &= ~(TSF_DIRTYINPUT | TSF_INHTMLCOMMENT);
+                ts->flags &= ~TSF_IN_HTML_COMMENT;
                goto skipline;
            }
            tt = TOK_DEC;
@ -2067,7 +2071,7 @@ skipline:

 out:
    JS_ASSERT(tt != TOK_EOL);
-    ts->flags |= TSF_DIRTYLINE | TSF_DIRTYINPUT;
+    ts->flags |= TSF_DIRTYLINE;

 eol_out:
    if (!STRING_BUFFER_OK(&ts->tokenbuf))
--- a/js/src/jsscan.h
+++ b/js/src/jsscan.h
@ -252,19 +252,19 @@ struct JSTokenStream {
 #define TSF_UNEXPECTED_EOF 0x1000

 /*
- * Non-whitespace since start of input, or since end of last HTML end-comment.
+ * Start of statement flag, set when getting the first token in a statment.
 * This is used to disambiguate an XML comment from the ancient Netscape-2-era
 * HTML comment hiding hack.
 */
-#define TSF_DIRTYINPUT 0x2000
+#define TSF_START_STATEMENT 0x2000

 /*
 * To handle the hard case of contiguous HTML comments, we want to clear the
 * TSF_DIRTYINPUT flag at the end of each such comment.  But we'd rather not
 * scan for --> within every //-style comment unless we have to.  So we set
- * the TSF_INHTMLCOMMENT flag when a <!-- is scanned as an HTML begin-comment,
- * and clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line,
- * or -- only if (ts->flags & TSF_INHTMLCOMMENT) -- in a //-style comment.
+ * TSF_IN_HTML_COMMENT when a <!-- is scanned as an HTML begin-comment, and
+ * clear it (and TSF_DIRTYINPUT) when we scan --> either on a clean line, or
+ * only if (ts->flags & TSF_IN_HTML_COMMENT), in a //-style comment.
 *
 * This still works as before given a malformed comment hiding hack such as:
 *
@ -277,7 +277,7 @@ struct JSTokenStream {
 * It does not cope with malformed comment hiding hacks where --> is hidden
 * by C-style comments, or on a dirty line.  Such cases are already broken.
 */
-#define TSF_INHTMLCOMMENT 0x4000
+#define TSF_IN_HTML_COMMENT 0x4000

 /* Unicode separators that are treated as line terminators, in addition to \n, \r */
 #define LINE_SEPARATOR  0x2028