Fixing http://bugzilla.mozilla.org/show_bug.cgi?id=179068

Allow to use char sequences exceeding 64K when storing source for decompilation The current 64K limit for string literals comes from omj/Parser.java where it constructs the internal script presentation for future decompilation. The patch extends this form to allow string sequences with more then 64K characters and modifes decompilation code in omj/NativeFunction.java accordingly.
2002-11-12 09:24:54 +00:00 · 2002-11-12 09:24:54 +00:00 · 99c0da59df
--- a/js/rhino/src/org/mozilla/javascript/NativeFunction.java
+++ b/js/rhino/src/org/mozilla/javascript/NativeFunction.java
@ -78,17 +78,19 @@ public class NativeFunction extends BaseFunction {
     */

    public String decompile(Context cx, int indent, boolean justbody) {
+        Object[] srcData = new Object[1];
        StringBuffer result = new StringBuffer();
-        decompile_r(this, indent, true, justbody, result);
+        decompile_r(this, indent, true, justbody, srcData, result);
        return result.toString();

    }

    private static void decompile_r(NativeFunction f, int indent,
                                    boolean toplevel, boolean justbody,
-                                    StringBuffer result)
+                                    Object[] srcData, StringBuffer result)
    {
        String source = f.source;
+
        if (source == null) {
            if (!justbody) {
                result.append("function ");
@ -129,17 +131,10 @@ public class NativeFunction extends BaseFunction {

        int i = 0;

-        if (length > 0) {
-            /* special-case FUNCTION as the first token; if it is,
-             * (and it's not followed by a NAME or LP) then we're
-             * decompiling a function (and not the toplevel script.)
-
-             * FUNCTION appearing elsewhere is an escape that means we'll
-             * need to call toString of the given function (object).
-
-             * If not at the top level, don't add an initial indent;
-             * let the caller do it, so functions as expressions look
-             * reasonable.  */
+        if (length != 0) {
+            // If the first token is TokenStream.SCRIPT, then we're
+            // decompiling the toplevel script, otherwise it a function
+            // and should start with TokenStream.FUNCTION

            if (toplevel) {
                // add an initial newline to exactly match js.
@ -149,13 +144,9 @@ public class NativeFunction extends BaseFunction {
                    result.append(' ');
            }

-            if (source.charAt(0) == TokenStream.FUNCTION
-                // make sure it's not a script that begins with a
-                // reference to a function definition.
-                && length > 1
-                && (source.charAt(1) == TokenStream.NAME
-                    || source.charAt(1) == TokenStream.LP))
-            {
+            int token = source.charAt(i);
+            ++i;
+            if (token == TokenStream.FUNCTION) {
                if (!justbody) {
                    result.append("function ");

@ -168,33 +159,42 @@ public class NativeFunction extends BaseFunction {
                     * less than 1.2... or if it's greater than 1.2, because
                     * we need to be closer to ECMA.  (ToSource, please?)
                     */
-                    if (nextIs(source, length, i, TokenStream.LP)
+                    if (source.charAt(i) == TokenStream.LP
                        && f.version != Context.VERSION_1_2
                        && f.functionName != null
                        && f.functionName.equals("anonymous"))
-                        result.append("anonymous");
-                    ++i;
-                } else {
-                    /* Skip past the entire function header to the next EOL.
-                     * Depends on how NAMEs are encoded.
-                     */
-                    while (i < length
-                           && (source.charAt(i) != TokenStream.EOL
-                               // the length char of a NAME sequence
-                               // can look like an EOL.
-                               || (i > 0
-                                   && source.charAt(i-1) == TokenStream.NAME)))
                    {
-                        ++i;
+                        result.append("anonymous");
+                    }
+                } else {
+                    // Skip past the entire function header pass the next EOL.
+                    skipLoop: for (;;) {
+                        token = source.charAt(i);
+                        ++i;
+                        switch (token) {
+                            case TokenStream.EOL:
+                                break skipLoop;
+                            case TokenStream.NAME:
+                                // Skip function or argument name
+                                i = Parser.getSourceString(source, i, null);
+                                break;
+                            case TokenStream.LP:
+                            case TokenStream.COMMA:
+                            case TokenStream.RP:
+                                break;
+                            default:
+                                // Bad function header
+                                throw new RuntimeException();
+                        }
                    }
-                    // Skip past the EOL, too.
-                    ++i;
                }
+            } else if (token != TokenStream.SCRIPT) {
+                // Bad source header
+                throw new RuntimeException();
            }
        }

        while (i < length) {
-            int stop;
            switch(source.charAt(i)) {
            case TokenStream.NAME:
            case TokenStream.REGEXP:  // re-wrapped in '/'s in parser...
@ -205,50 +205,23 @@ public class NativeFunction extends BaseFunction {
                 * Also change function-header skipping code above,
                 * used when decompling under decompileFunctionBody.
                 */
-                ++i;
-                stop = i + (int)source.charAt(i);
-                result.append(source.substring(i + 1, stop + 1));
-                i = stop;
-                break;
+                i = Parser.getSourceString(source, i + 1, srcData);
+                result.append((String)srcData[0]);
+                continue;

-            case TokenStream.NUMBER:
-                ++i;
-                long lbits = 0;
-                switch(source.charAt(i)) {
-                case 'S':
-                    ++i;
-                    result.append((int)source.charAt(i));
-                    break;
-
-                case 'J':
-                    lbits |= (long)source.charAt(++i) << 48;
-                    lbits |= (long)source.charAt(++i) << 32;
-                    lbits |= (long)source.charAt(++i) << 16;
-                    lbits |= (long)source.charAt(++i);
-
-                    result.append(lbits);
-                    break;
-                case 'D':
-                    lbits |= (long)source.charAt(++i) << 48;
-                    lbits |= (long)source.charAt(++i) << 32;
-                    lbits |= (long)source.charAt(++i) << 16;
-                    lbits |= (long)source.charAt(++i);
-
-                    double dval = Double.longBitsToDouble(lbits);
-                    result.append(ScriptRuntime.numberToString(dval, 10));
-                    break;
-                }
-                break;
+            case TokenStream.NUMBER: {
+                i = Parser.getSourceNumber(source, i + 1, srcData);
+                double number = ((Number)srcData[0]).doubleValue();
+                result.append(ScriptRuntime.numberToString(number, 10));
+                continue;
+            }

            case TokenStream.STRING:
-                ++i;
-                stop = i + (int)source.charAt(i);
+                i = Parser.getSourceString(source, i + 1, srcData);
                result.append('"');
-                result.append(ScriptRuntime.escapeString
-                              (source.substring(i + 1, stop + 1)));
+                result.append(ScriptRuntime.escapeString((String)srcData[0]));
                result.append('"');
-                i = stop;
-                break;
+                continue;

            case TokenStream.PRIMARY:
                ++i;
@ -307,7 +280,7 @@ public class NativeFunction extends BaseFunction {
                    throw Context.reportRuntimeError(message);
                }
                decompile_r(f.nestedFunctions[functionNumber], indent,
-                            false, false, result);
+                            false, false, srcData, result);
                break;
            }
            case TokenStream.COMMA:
@ -367,19 +340,21 @@ public class NativeFunction extends BaseFunction {
                 */
                if (i + 1 < length) {
                    int less = 0;
-                    if (nextIs(source, length, i, TokenStream.CASE)
-                        || nextIs(source, length, i, TokenStream.DEFAULT))
+                    int nextToken = source.charAt(i + 1);
+                    if (nextToken == TokenStream.CASE
+                        || nextToken == TokenStream.DEFAULT)
                        less = SETBACK;
-                    else if (nextIs(source, length, i, TokenStream.RC))
+                    else if (nextToken == TokenStream.RC)
                        less = OFFSET;

                    /* elaborate check against label... skip past a
                     * following inlined NAME and look for a COLON.
                     * Depends on how NAME is encoded.
                     */
-                    else if (nextIs(source, length, i, TokenStream.NAME)) {
-                        int skip = source.charAt(i + 2);
-                        if (source.charAt(i + skip + 3) == TokenStream.COLON)
+                    else if (nextToken == TokenStream.NAME) {
+                        int afterName = Parser.getSourceString(source, i + 2,
+                                                               null);
+                        if (source.charAt(afterName) == TokenStream.COLON)
                            less = OFFSET;
                    }

--- a/js/rhino/src/org/mozilla/javascript/Parser.java
+++ b/js/rhino/src/org/mozilla/javascript/Parser.java
@ -104,6 +104,9 @@ class Parser {
         * we've collected all the source */
        Object tempBlock = nf.createLeaf(TokenStream.BLOCK);

+        // Add script indicator
+        sourceAdd((char)ts.SCRIPT);
+
        while (true) {
            ts.flags |= ts.TSF_REGEXP;
            tt = ts.getToken();
@ -1462,17 +1465,43 @@ class Parser {
    }

    private void sourceAddString(int type, String str) {
-        int L = str.length();
-        // java string length < 2^16?
-        if (Context.check && L > Character.MAX_VALUE) Context.codeBug();
-
-        if (sourceTop + L + 2 > sourceBuffer.length) {
-            increaseSourceCapacity(sourceTop + L + 2);
-        }
        sourceAdd((char)type);
-        sourceAdd((char)L);
+        sourceAddString(str);
+    }
+
+    private void sourceAddString(String str) {
+        int L = str.length();
+        int lengthEncodingSize = 1;
+        if (L >= 0x8000) {
+            lengthEncodingSize = 2;
+        }
+        int nextTop = sourceTop + lengthEncodingSize + L;
+        if (nextTop > sourceBuffer.length) {
+            increaseSourceCapacity(nextTop);
+        }
+        if (L >= 0x8000) {
+            // Use 2 chars to encode strings exceeding 32K, were the highest
+            // bit in the first char indicates presence of the next byte
+            sourceBuffer[sourceTop] = (char)(0x8000 | (L >>> 16));
+            ++sourceTop;
+        }
+        sourceBuffer[sourceTop] = (char)L;
+        ++sourceTop;
        str.getChars(0, L, sourceBuffer, sourceTop);
-        sourceTop += L;
+        sourceTop = nextTop;
+    }
+
+    static int getSourceString(String source, int offset, Object[] result) {
+        int length = source.charAt(offset);
+        ++offset;
+        if ((0x8000 & length) != 0) {
+            length = ((0x7FFF & length) << 16) | source.charAt(offset);
+            ++offset;
+        }
+        if (result != null) {
+            result[0] = source.substring(offset, offset + length);
+        }
+        return offset + length;
    }

    private void sourceAddNumber(double n) {
@ -1527,6 +1556,38 @@ class Parser {
        }
    }

+    static int getSourceNumber(String source, int offset, Object[] result) {
+        char type = source.charAt(offset);
+        ++offset;
+        if (type == 'S') {
+            if (result != null) {
+                int ival = source.charAt(offset);
+                result[0] = new Integer(ival);
+            }
+            ++offset;
+        } else if (type == 'J' || type == 'D') {
+            if (result != null) {
+                long lbits;
+                lbits = (long)source.charAt(offset) << 48;
+                lbits |= (long)source.charAt(offset + 1) << 32;
+                lbits |= (long)source.charAt(offset + 2) << 16;
+                lbits |= (long)source.charAt(offset + 3);
+                double dval;
+                if (type == 'J') {
+                    dval = lbits;
+                } else {
+                    dval = Double.longBitsToDouble(lbits);
+                }
+                result[0] = new Double(dval);
+            }
+            offset += 4;
+        } else {
+            // Bad source
+            throw new RuntimeException();
+        }
+        return offset;
+    }
+
    private void increaseSourceCapacity(int minimalCapacity) {
        // Call this only when capacity increase is must
        if (Context.check && minimalCapacity <= sourceBuffer.length)