gecko-dev/toolkit/devtools/pretty-fast/pretty-fast.js

/* -*- indent-tabs-mode: nil; js-indent-level: 2; fill-column: 80 -*- */
/*
 * Copyright 2013 Mozilla Foundation and contributors
 * Licensed under the New BSD license. See LICENSE.md or:
 * http://opensource.org/licenses/BSD-2-Clause
 */
(function (root, factory) {
  if (typeof define === 'function' && define.amd) {
    define(factory);
  } else if (typeof exports === 'object') {
    module.exports = factory();
  } else {
    root.prettyFast = factory();
  }
}(this, function () {
  "use strict";

  var acorn = this.acorn || require("acorn/acorn");
  var sourceMap = this.sourceMap || require("source-map");
  var SourceNode = sourceMap.SourceNode;

  // If any of these tokens are seen before a "[" token, we know that "[" token
  // is the start of an array literal, rather than a property access.
  //
  // The only exception is "}", which would need to be disambiguated by
  // parsing. The majority of the time, an open bracket following a closing
  // curly is going to be an array literal, so we brush the complication under
  // the rug, and handle the ambiguity by always assuming that it will be an
  // array literal.
  var PRE_ARRAY_LITERAL_TOKENS = {
    "typeof": true,
    "void": true,
    "delete": true,
    "case": true,
    "do": true,
    "=": true,
    "in": true,
    "{": true,
    "*": true,
    "/": true,
    "%": true,
    "else": true,
    ";": true,
    "++": true,
    "--": true,
    "+": true,
    "-": true,
    "~": true,
    "!": true,
    ":": true,
    "?": true,
    ">>": true,
    ">>>": true,
    "<<": true,
    "||": true,
    "&&": true,
    "<": true,
    ">": true,
    "<=": true,
    ">=": true,
    "instanceof": true,
    "&": true,
    "^": true,
    "|": true,
    "==": true,
    "!=": true,
    "===": true,
    "!==": true,
    ",": true,

    "}": true
  };

  /**
   * Determines if we think that the given token starts an array literal.
   *
   * @param Object token
   *        The token we want to determine if it is an array literal.
   * @param Object lastToken
   *        The last token we added to the pretty printed results.
   *
   * @returns Boolean
   *          True if we believe it is an array literal, false otherwise.
   */
  function isArrayLiteral(token, lastToken) {
    if (token.type.type != "[") {
      return false;
    }
    if (!lastToken) {
      return true;
    }
    if (lastToken.type.isAssign) {
      return true;
    }
    return !!PRE_ARRAY_LITERAL_TOKENS[lastToken.type.keyword || lastToken.type.type];
  }

  // If any of these tokens are followed by a token on a new line, we know that
  // ASI cannot happen.
  var PREVENT_ASI_AFTER_TOKENS = {
    // Binary operators
    "*": true,
    "/": true,
    "%": true,
    "+": true,
    "-": true,
    "<<": true,
    ">>": true,
    ">>>": true,
    "<": true,
    ">": true,
    "<=": true,
    ">=": true,
    "instanceof": true,
    "in": true,
    "==": true,
    "!=": true,
    "===": true,
    "!==": true,
    "&": true,
    "^": true,
    "|": true,
    "&&": true,
    "||": true,
    ",": true,
    ".": true,
    "=": true,
    "*=": true,
    "/=": true,
    "%=": true,
    "+=": true,
    "-=": true,
    "<<=": true,
    ">>=": true,
    ">>>=": true,
    "&=": true,
    "^=": true,
    "|=": true,
    // Unary operators
    "delete": true,
    "void": true,
    "typeof": true,
    "~": true,
    "!": true,
    "new": true,
    // Function calls and grouped expressions
    "(": true
  };

  // If any of these tokens are on a line after the token before it, we know
  // that ASI cannot happen.
  var PREVENT_ASI_BEFORE_TOKENS = {
    // Binary operators
    "*": true,
    "/": true,
    "%": true,
    "<<": true,
    ">>": true,
    ">>>": true,
    "<": true,
    ">": true,
    "<=": true,
    ">=": true,
    "instanceof": true,
    "in": true,
    "==": true,
    "!=": true,
    "===": true,
    "!==": true,
    "&": true,
    "^": true,
    "|": true,
    "&&": true,
    "||": true,
    ",": true,
    ".": true,
    "=": true,
    "*=": true,
    "/=": true,
    "%=": true,
    "+=": true,
    "-=": true,
    "<<=": true,
    ">>=": true,
    ">>>=": true,
    "&=": true,
    "^=": true,
    "|=": true,
    // Function calls
    "(": true
  };

  /**
   * Determines if Automatic Semicolon Insertion (ASI) occurs between these
   * tokens.
   *
   * @param Object token
   *        The current token.
   * @param Object lastToken
   *        The last token we added to the pretty printed results.
   *
   * @returns Boolean
   *          True if we believe ASI occurs.
   */
  function isASI(token, lastToken) {
    if (!lastToken) {
      return false;
    }
    if (token.startLoc.line === lastToken.startLoc.line) {
      return false;
    }
    if (PREVENT_ASI_AFTER_TOKENS[lastToken.type.type || lastToken.type.keyword]) {
      return false;
    }
    if (PREVENT_ASI_BEFORE_TOKENS[token.type.type || token.type.keyword]) {
      return false;
    }
    return true;
  }

  /**
   * Determine if we have encountered a getter or setter.
   *
   * @param Object token
   *        The current token. If this is a getter or setter, it would be the
   *        property name.
   * @param Object lastToken
   *        The last token we added to the pretty printed results. If this is a
   *        getter or setter, it would be the `get` or `set` keyword
   *        respectively.
   * @param Array stack
   *        The stack of open parens/curlies/brackets/etc.
   *
   * @returns Boolean
   *          True if this is a getter or setter.
   */
  function isGetterOrSetter(token, lastToken, stack) {
    return stack[stack.length - 1] == "{"
      && lastToken
      && lastToken.type.type == "name"
      && (lastToken.value == "get" || lastToken.value == "set")
      && token.type.type == "name";
  }

  /**
   * Determine if we should add a newline after the given token.
   *
   * @param Object token
   *        The token we are looking at.
   * @param Array stack
   *        The stack of open parens/curlies/brackets/etc.
   *
   * @returns Boolean
   *          True if we should add a newline.
   */
  function isLineDelimiter(token, stack) {
    if (token.isArrayLiteral) {
      return true;
    }
    var ttt = token.type.type;
    var top = stack[stack.length - 1];
    return ttt == ";" && top != "("
      || ttt == "{"
      || ttt == "," && top != "("
      || ttt == ":" && (top == "case" || top == "default");
  }

  /**
   * Append the necessary whitespace to the result after we have added the given
   * token.
   *
   * @param Object token
   *        The token that was just added to the result.
   * @param Function write
   *        The function to write to the pretty printed results.
   * @param Array stack
   *        The stack of open parens/curlies/brackets/etc.
   *
   * @returns Boolean
   *          Returns true if we added a newline to result, false in all other
   *          cases.
   */
  function appendNewline(token, write, stack) {
    if (isLineDelimiter(token, stack)) {
      write("\n", token.startLoc.line, token.startLoc.column);
      return true;
    }
    return false;
  }

  /**
   * Determines if we need to add a space between the last token we added and
   * the token we are about to add.
   *
   * @param Object token
   *        The token we are about to add to the pretty printed code.
   * @param Object lastToken
   *        The last token added to the pretty printed code.
   */
  function needsSpaceAfter(token, lastToken) {
    if (lastToken) {
      if (lastToken.type.isLoop) {
        return true;
      }
      if (lastToken.type.isAssign) {
        return true;
      }
      if (lastToken.type.binop != null) {
        return true;
      }

      var ltt = lastToken.type.type;
      if (ltt == "?") {
        return true;
      }
      if (ltt == ":") {
        return true;
      }
      if (ltt == ",") {
        return true;
      }
      if (ltt == ";") {
        return true;
      }

      var ltk = lastToken.type.keyword;
      if (ltk != null) {
        if (ltk == "break" || ltk == "continue" || ltk == "return") {
          return token.type.type != ";";
        }
        if (ltk != "debugger"
            && ltk != "null"
            && ltk != "true"
            && ltk != "false"
            && ltk != "this"
            && ltk != "default") {
          return true;
        }
      }

      if (ltt == ")" && (token.type.type != ")"
                         && token.type.type != "]"
                         && token.type.type != ";"
                         && token.type.type != ","
                         && token.type.type != ".")) {
        return true;
      }
    }

    if (token.type.isAssign) {
      return true;
    }
    if (token.type.binop != null) {
      return true;
    }
    if (token.type.type == "?") {
      return true;
    }

    return false;
  }

  /**
   * Add the required whitespace before this token, whether that is a single
   * space, newline, and/or the indent on fresh lines.
   *
   * @param Object token
   *        The token we are about to add to the pretty printed code.
   * @param Object lastToken
   *        The last token we added to the pretty printed code.
   * @param Boolean addedNewline
   *        Whether we added a newline after adding the last token to the pretty
   *        printed code.
   * @param Function write
   *        The function to write pretty printed code to the result SourceNode.
   * @param Object options
   *        The options object.
   * @param Number indentLevel
   *        The number of indents deep we are.
   * @param Array stack
   *        The stack of open curlies, brackets, etc.
   */
  function prependWhiteSpace(token, lastToken, addedNewline, write, options,
                             indentLevel, stack) {
    var ttk = token.type.keyword;
    var ttt = token.type.type;
    var newlineAdded = addedNewline;
    var ltt = lastToken ? lastToken.type.type : null;

    // Handle whitespace and newlines after "}" here instead of in
    // `isLineDelimiter` because it is only a line delimiter some of the
    // time. For example, we don't want to put "else if" on a new line after
    // the first if's block.
    if (lastToken && ltt == "}") {
      if (ttk == "while" && stack[stack.length - 1] == "do") {
        write(" ",
              lastToken.startLoc.line,
              lastToken.startLoc.column);
      } else if (ttk == "else" ||
                 ttk == "catch" ||
                 ttk == "finally") {
        write(" ",
              lastToken.startLoc.line,
              lastToken.startLoc.column);
      } else if (ttt != "(" &&
                 ttt != ";" &&
                 ttt != "," &&
                 ttt != ")" &&
                 ttt != ".") {
        write("\n",
              lastToken.startLoc.line,
              lastToken.startLoc.column);
        newlineAdded = true;
      }
    }

    if (isGetterOrSetter(token, lastToken, stack)) {
      write(" ",
            lastToken.startLoc.line,
            lastToken.startLoc.column);
    }

    if (ttt == ":" && stack[stack.length - 1] == "?") {
      write(" ",
            lastToken.startLoc.line,
            lastToken.startLoc.column);
    }

    if (lastToken && ltt != "}" && ttk == "else") {
      write(" ",
            lastToken.startLoc.line,
            lastToken.startLoc.column);
    }

    function ensureNewline() {
      if (!newlineAdded) {
        write("\n",
              lastToken.startLoc.line,
              lastToken.startLoc.column);
        newlineAdded = true;
      }
    }

    if (isASI(token, lastToken)) {
      ensureNewline();
    }

    if (decrementsIndent(ttt, stack)) {
      ensureNewline();
    }

    if (newlineAdded) {
      if (ttk == "case" || ttk == "default") {
        write(repeat(options.indent, indentLevel - 1),
              token.startLoc.line,
              token.startLoc.column);
      } else {
        write(repeat(options.indent, indentLevel),
              token.startLoc.line,
              token.startLoc.column);
      }
    } else if (needsSpaceAfter(token, lastToken)) {
      write(" ",
            lastToken.startLoc.line,
            lastToken.startLoc.column);
    }
  }

  /**
   * Repeat the `str` string `n` times.
   *
   * @param String str
   *        The string to be repeated.
   * @param Number n
   *        The number of times to repeat the string.
   *
   * @returns String
   *          The repeated string.
   */
  function repeat(str, n) {
    var result = "";
    while (n > 0) {
      if (n & 1) {
        result += str;
      }
      n >>= 1;
      str += str;
    }
    return result;
  }

  /**
   * Make sure that we output the escaped character combination inside string literals
   * instead of various problematic characters.
   */
  var sanitize = (function () {
    var escapeCharacters = {
      // Backslash
      "\\": "\\\\",
      // Newlines
      "\n": "\\n",
      // Carriage return
      "\r": "\\r",
      // Tab
      "\t": "\\t",
      // Vertical tab
      "\v": "\\v",
      // Form feed
      "\f": "\\f",
      // Null character
      "\0": "\\0",
      // Single quotes
      "'": "\\'"
    };

    var regExpString = "("
      + Object.keys(escapeCharacters)
              .map(function (c) { return escapeCharacters[c]; })
              .join("|")
      + ")";
    var escapeCharactersRegExp = new RegExp(regExpString, "g");

    return function(str) {
      return str.replace(escapeCharactersRegExp, function (_, c) {
        return escapeCharacters[c];
      });
    }
  }());
  /**
   * Add the given token to the pretty printed results.
   *
   * @param Object token
   *        The token to add.
   * @param Function write
   *        The function to write pretty printed code to the result SourceNode.
   * @param Object options
   *        The options object.
   */
  function addToken(token, write, options) {
    if (token.type.type == "string") {
      write("'" + sanitize(token.value) + "'",
            token.startLoc.line,
            token.startLoc.column);
    } else {
      write(String(token.value != null ? token.value : token.type.type),
            token.startLoc.line,
            token.startLoc.column);
    }
  }

  /**
   * Returns true if the given token type belongs on the stack.
   */
  function belongsOnStack(token) {
    var ttt = token.type.type;
    var ttk = token.type.keyword;
    return ttt == "{"
      || ttt == "("
      || ttt == "["
      || ttt == "?"
      || ttk == "do"
      || ttk == "switch"
      || ttk == "case"
      || ttk == "default";
  }

  /**
   * Returns true if the given token should cause us to pop the stack.
   */
  function shouldStackPop(token, stack) {
    var ttt = token.type.type;
    var ttk = token.type.keyword;
    var top = stack[stack.length - 1];
    return ttt == "]"
      || ttt == ")"
      || ttt == "}"
      || (ttt == ":" && (top == "case" || top == "default" || top == "?"))
      || (ttk == "while" && top == "do");
  }

  /**
   * Returns true if the given token type should cause us to decrement the
   * indent level.
   */
  function decrementsIndent(tokenType, stack) {
    return tokenType == "}"
      || (tokenType == "]" && stack[stack.length - 1] == "[\n")
  }

  /**
   * Returns true if the given token should cause us to increment the indent
   * level.
   */
  function incrementsIndent(token) {
    return token.type.type == "{"
      || token.isArrayLiteral
      || token.type.keyword == "switch";
  }

  /**
   * Add a comment to the pretty printed code.
   *
   * @param Function write
   *        The function to write pretty printed code to the result SourceNode.
   * @param Number indentLevel
   *        The number of indents deep we are.
   * @param Object options
   *        The options object.
   * @param Boolean block
   *        True if the comment is a multiline block style comment.
   * @param String text
   *        The text of the comment.
   * @param Number line
   *        The line number to comment appeared on.
   * @param Number column
   *        The column number the comment appeared on.
   */
  function addComment(write, indentLevel, options, block, text, line, column) {
    var indentString = repeat(options.indent, indentLevel);

    write(indentString, line, column);
    if (block) {
      write("/*");
      write(text
            .split(new RegExp("/\n" + indentString + "/", "g"))
            .join("\n" + indentString));
      write("*/");
    } else {
      write("//");
      write(text);
    }
    write("\n");
  }

  /**
   * The main function.
   *
   * @param String input
   *        The ugly JS code we want to pretty print.
   * @param Object options
   *        The options object. Provides configurability of the pretty
   *        printing. Properties:
   *          - url: The URL string of the ugly JS code.
   *          - indent: The string to indent code by.
   *
   * @returns Object
   *          An object with the following properties:
   *            - code: The pretty printed code string.
   *            - map: A SourceMapGenerator instance.
   */
  return function prettyFast(input, options) {
    // The level of indents deep we are.
    var indentLevel = 0;

    // We will accumulate the pretty printed code in this SourceNode.
    var result = new SourceNode();

    /**
     * Write a pretty printed string to the result SourceNode.
     *
     * We buffer our writes so that we only create one mapping for each line in
     * the source map. This enhances performance by avoiding extraneous mapping
     * serialization, and flattening the tree that
     * `SourceNode#toStringWithSourceMap` will have to recursively walk. When
     * timing how long it takes to pretty print jQuery, this optimization
     * brought the time down from ~390 ms to ~190ms!
     *
     * @param String str
     *        The string to be added to the result.
     * @param Number line
     *        The line number the string came from in the ugly source.
     * @param Number column
     *        The column number the string came from in the ugly source.
     */
    var write = (function () {
      var buffer = [];
      var bufferLine = -1;
      var bufferColumn = -1;
      return function write(str, line, column) {
        if (line != null && bufferLine === -1) {
          bufferLine = line;
        }
        if (column != null && bufferColumn === -1) {
          bufferColumn = column;
        }
        buffer.push(str);

        if (str == "\n") {
          var lineStr = "";
          for (var i = 0, len = buffer.length; i < len; i++) {
            lineStr += buffer[i];
          }
          result.add(new SourceNode(bufferLine, bufferColumn, options.url, lineStr));
          buffer.splice(0, buffer.length);
          bufferLine = -1;
          bufferColumn = -1;
        }
      }
    }());

    // Whether or not we added a newline on after we added the last token.
    var addedNewline = false;

    // The current token we will be adding to the pretty printed code.
    var token;

    // Shorthand for token.type.type, so we don't have to repeatedly access
    // properties.
    var ttt;

    // Shorthand for token.type.keyword, so we don't have to repeatedly access
    // properties.
    var ttk;

    // The last token we added to the pretty printed code.
    var lastToken;

    // Stack of token types/keywords that can affect whether we want to add a
    // newline or a space. We can make that decision based on what token type is
    // on the top of the stack. For example, a comma in a parameter list should
    // be followed by a space, while a comma in an object literal should be
    // followed by a newline.
    //
    // Strings that go on the stack:
    //
    //   - "{"
    //   - "("
    //   - "["
    //   - "[\n"
    //   - "do"
    //   - "?"
    //   - "switch"
    //   - "case"
    //   - "default"
    //
    // The difference between "[" and "[\n" is that "[\n" is used when we are
    // treating "[" and "]" tokens as line delimiters and should increment and
    // decrement the indent level when we find them.
    var stack = [];

    // Acorn's tokenizer will always yield comments *before* the token they
    // follow (unless the very first thing in the source is a comment), so we
    // have to queue the comments in order to pretty print them in the correct
    // location. For example, the source file:
    //
    //     foo
    //     // a
    //     // b
    //     bar
    //
    // When tokenized by acorn, gives us the following token stream:
    //
    //     [ '// a', '// b', foo, bar ]
    var commentQueue = [];

    var getToken = acorn.tokenize(input, {
      locations: true,
      sourceFile: options.url,
      onComment: function (block, text, start, end, startLoc, endLoc) {
        if (lastToken) {
          commentQueue.push({
            block: block,
            text: text,
            line: startLoc.line,
            column: startLoc.column,
            trailing: lastToken.endLoc.line == startLoc.line
          });
        } else {
          addComment(write, indentLevel, options, block, text, startLoc.line,
                     startLoc.column);
          addedNewline = true;
        }
      }
    });

    while (true) {
      token = getToken();

      ttk = token.type.keyword;
      ttt = token.type.type;

      if (ttt == "eof") {
        if (!addedNewline) {
          write("\n");
        }
        break;
      }

      token.isArrayLiteral = isArrayLiteral(token, lastToken);

      if (belongsOnStack(token)) {
        if (token.isArrayLiteral) {
          stack.push("[\n");
        } else {
          stack.push(ttt || ttk);
        }
      }

      if (decrementsIndent(ttt, stack)) {
        indentLevel--;
        if (ttt == "}"
            && stack.length > 1
            && stack[stack.length - 2] == "switch") {
          indentLevel--;
        }
      }

      prependWhiteSpace(token, lastToken, addedNewline, write, options,
                        indentLevel, stack);
      addToken(token, write, options);
      if (commentQueue.length == 0 || !commentQueue[0].trailing) {
        addedNewline = appendNewline(token, write, stack);
      }

      if (shouldStackPop(token, stack)) {
        stack.pop();
        if (token == "}" && stack.length
            && stack[stack.length - 1] == "switch") {
          stack.pop();
        }
      }

      if (incrementsIndent(token)) {
        indentLevel++;
      }

      // Acorn's tokenizer re-uses tokens, so we have to copy the last token on
      // every iteration. We follow acorn's lead here, and reuse the lastToken
      // object the same way that acorn reuses the token object. This allows us
      // to avoid allocations and minimize GC pauses.
      if (!lastToken) {
        lastToken = { startLoc: {}, endLoc: {} };
      }
      lastToken.start = token.start;
      lastToken.end = token.end;
      lastToken.startLoc.line = token.startLoc.line;
      lastToken.startLoc.column = token.startLoc.column;
      lastToken.endLoc.line = token.endLoc.line;
      lastToken.endLoc.column = token.endLoc.column;
      lastToken.type = token.type;
      lastToken.value = token.value;
      lastToken.isArrayLiteral = token.isArrayLiteral;

      // Apply all the comments that have been queued up.
      if (commentQueue.length) {
        if (!addedNewline && !commentQueue[0].trailing) {
          write("\n");
        }
        if (commentQueue[0].trailing) {
          write(" ");
        }
        for (var i = 0, n = commentQueue.length; i < n; i++) {
          var comment = commentQueue[i];
          var commentIndentLevel = commentQueue[i].trailing ? 0 : indentLevel;
          addComment(write, commentIndentLevel, options, comment.block,
                     comment.text, comment.line, comment.column);
        }
        addedNewline = true;
        commentQueue.splice(0, commentQueue.length);
      }
    }

    return result.toStringWithSourceMap({ file: options.url });
  };

}.bind(this)));