зеркало из https://github.com/mozilla/pjs.git
Extension to the narcissus parser to handle Mozilla-specific extensions to JS (572014, r=pwalton).
This commit is contained in:
Родитель
9bc7d8a94f
Коммит
41be40172b
|
@ -72,7 +72,7 @@ var tokens = [
|
|||
// Nonterminal tree node type codes.
|
||||
"SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX",
|
||||
"ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER",
|
||||
"GROUP", "LIST",
|
||||
"GROUP", "LIST", "LET_STM", "LET_EXP", "LET_DEF",
|
||||
|
||||
// Terminals.
|
||||
"IDENTIFIER", "NUMBER", "STRING", "REGEXP",
|
||||
|
@ -84,11 +84,13 @@ var tokens = [
|
|||
"else", "enum",
|
||||
"false", "finally", "for", "function",
|
||||
"if", "in", "instanceof",
|
||||
"let",
|
||||
"new", "null",
|
||||
"return",
|
||||
"switch",
|
||||
"this", "throw", "true", "try", "typeof",
|
||||
"var", "void",
|
||||
"yield",
|
||||
"while", "with",
|
||||
];
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ for (var op in opTypeNames) {
|
|||
}
|
||||
}
|
||||
|
||||
// file ptr, path to file, line number -> Tokenizer
|
||||
function Tokenizer(s, f, l) {
|
||||
this.cursor = 0;
|
||||
this.source = String(s);
|
||||
|
@ -363,6 +364,9 @@ Tokenizer.prototype = {
|
|||
token.value = id;
|
||||
},
|
||||
|
||||
// void -> token type
|
||||
// It consumes input *only* if there is no lookahead.
|
||||
// Dispatch to the appropriate lexing function depending on the input.
|
||||
get: function () {
|
||||
var token;
|
||||
while (this.lookahead) {
|
||||
|
@ -415,6 +419,8 @@ Tokenizer.prototype = {
|
|||
return token.type;
|
||||
},
|
||||
|
||||
// void -> undefined
|
||||
// match depends on unget returning undefined.
|
||||
unget: function () {
|
||||
if (++this.lookahead == 4) throw "PANIC: too much lookahead!";
|
||||
this.tokenIndex = (this.tokenIndex - 1) & 3;
|
||||
|
|
|
@ -43,10 +43,15 @@
|
|||
* Parser.
|
||||
*/
|
||||
|
||||
// boolean -> undefined
|
||||
// inFunction is used to check if a return stm appears in a valid context.
|
||||
function CompilerContext(inFunction) {
|
||||
this.inFunction = inFunction;
|
||||
//The elms of stmtStack are used to find the target label of CONTINUEs and
|
||||
// BREAKs. Its length is used in function definitions.
|
||||
this.stmtStack = [];
|
||||
this.funDecls = [];
|
||||
//varDecls accumulate when we process decls w/ the var keyword.
|
||||
this.varDecls = [];
|
||||
}
|
||||
|
||||
|
@ -59,11 +64,15 @@ CompilerContext.prototype = {
|
|||
inForLoopInit: false,
|
||||
};
|
||||
|
||||
// tokenizer, compiler context -> node
|
||||
// parses the toplevel and function bodies
|
||||
function Script(t, x) {
|
||||
var n = Statements(t, x);
|
||||
n.type = SCRIPT;
|
||||
n.funDecls = x.funDecls;
|
||||
n.varDecls = x.varDecls;
|
||||
// LETs may add varDecls to blocks.
|
||||
n.varDecls = n.varDecls || [];
|
||||
Array.prototype.push.apply(n.varDecls, x.varDecls);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -73,18 +82,21 @@ defineProperty(Array.prototype, "top",
|
|||
return this.length && this[this.length-1];
|
||||
}, false, false, true);
|
||||
|
||||
// tokenizer, optional type -> node
|
||||
function Node(t, type) {
|
||||
var token = t.token;
|
||||
if (token) {
|
||||
this.type = type || token.type;
|
||||
this.value = token.value;
|
||||
this.lineno = token.lineno;
|
||||
// start & end are file positions for error handling
|
||||
this.start = token.start;
|
||||
this.end = token.end;
|
||||
} else {
|
||||
this.type = type;
|
||||
this.lineno = t.lineno;
|
||||
}
|
||||
// nodes use a tokenizer for debugging (getSource, filename getter)
|
||||
this.tokenizer = t;
|
||||
|
||||
for (var i = 2; i < arguments.length; i++)
|
||||
|
@ -97,10 +109,12 @@ Np.toSource = Object.prototype.toSource;
|
|||
|
||||
// Always use push to add operands to an expression, to update start and end.
|
||||
Np.push = function (kid) {
|
||||
if (kid !== null) { // kids can be null e.g. [1, , 2]
|
||||
if (kid.start < this.start)
|
||||
this.start = kid.start;
|
||||
if (this.end < kid.end)
|
||||
this.end = kid.end;
|
||||
}
|
||||
return Array.prototype.push.call(this, kid);
|
||||
}
|
||||
|
||||
|
@ -154,6 +168,8 @@ function nest(t, x, node, func, end) {
|
|||
return n;
|
||||
}
|
||||
|
||||
// tokenizer, compiler context -> node
|
||||
// parses a list of Statements
|
||||
function Statements(t, x) {
|
||||
var n = new Node(t, BLOCK);
|
||||
x.stmtStack.push(n);
|
||||
|
@ -172,13 +188,28 @@ function Block(t, x) {
|
|||
|
||||
const DECLARED_FORM = 0, EXPRESSED_FORM = 1, STATEMENT_FORM = 2;
|
||||
|
||||
// tokenizer, compiler context -> node
|
||||
// parses a Statement
|
||||
function Statement(t, x) {
|
||||
var i, label, n, n2, ss, tt = t.get();
|
||||
|
||||
// Cases for statements ending in a right curly return early, avoiding the
|
||||
// common semicolon insertion magic after this switch.
|
||||
switch (tt) {
|
||||
case LET:
|
||||
n = LetForm(t, x, STATEMENT_FORM);
|
||||
if (n.type === LET_STM)
|
||||
return n;
|
||||
if (n.type === LET_EXP) {// exps in stm context are semi nodes
|
||||
n2 = new Node(t, SEMICOLON);
|
||||
n2.expression = n;
|
||||
n = n2;
|
||||
n.end = n.expression.end;
|
||||
}
|
||||
break;
|
||||
|
||||
case FUNCTION:
|
||||
// DECLD_FORM extends fundefs of x, STM_FORM doesn't.
|
||||
return FunctionDefinition(t, x, true,
|
||||
(x.stmtStack.length > 1)
|
||||
? STATEMENT_FORM
|
||||
|
@ -199,10 +230,10 @@ function Statement(t, x) {
|
|||
return n;
|
||||
|
||||
case SWITCH:
|
||||
// This allows CASEs after a DEFAULT, which is in the standard.
|
||||
n = new Node(t);
|
||||
t.mustMatch(LEFT_PAREN);
|
||||
n.discriminant = Expression(t, x);
|
||||
t.mustMatch(RIGHT_PAREN);
|
||||
|
||||
n.discriminant = ParenExpression(t, x);
|
||||
n.cases = [];
|
||||
n.defaultIndex = -1;
|
||||
x.stmtStack.push(n);
|
||||
|
@ -235,34 +266,51 @@ function Statement(t, x) {
|
|||
case FOR:
|
||||
n = new Node(t);
|
||||
n.isLoop = true;
|
||||
if (t.match(IDENTIFIER)) {
|
||||
if (t.token.value !== "each")
|
||||
throw t.newSyntaxError("Illegal identifier after for");
|
||||
else
|
||||
n.foreach = true;
|
||||
}
|
||||
t.mustMatch(LEFT_PAREN);
|
||||
if ((tt = t.peek()) != SEMICOLON) {
|
||||
x.inForLoopInit = true;
|
||||
if (tt == VAR || tt == CONST) {
|
||||
switch (tt) {
|
||||
case VAR: case CONST:
|
||||
t.get();
|
||||
n2 = Variables(t, x);
|
||||
} else {
|
||||
break;
|
||||
case LET:
|
||||
t.get();
|
||||
n2 = Variables(t, x, "local decls");
|
||||
// don't confuse w/ n.varDecl used by for/in.
|
||||
n.varDecls = [];
|
||||
for (var i = 0, len = n2.length, vdecls = n.varDecls; i < len; i++)
|
||||
vdecls.push(n2[i]);
|
||||
break;
|
||||
default:
|
||||
n2 = Expression(t, x);
|
||||
break;
|
||||
}
|
||||
x.inForLoopInit = false;
|
||||
}
|
||||
if (n2 && t.match(IN)) {
|
||||
if (n2 && t.match(IN)) { // for...in
|
||||
var n2t = n2.type,
|
||||
se = t.newSyntaxError("Invalid for..in left-hand side");
|
||||
n.type = FOR_IN;
|
||||
if (n2.type == VAR) {
|
||||
if (n2.length != 1) {
|
||||
throw new SyntaxError("Invalid for..in left-hand side",
|
||||
t.filename, n2.lineno);
|
||||
}
|
||||
|
||||
// NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
|
||||
if (n2t === VAR || n2t === LET) {
|
||||
if (n2.length != 1) throw se;
|
||||
n.iterator = n2[0];
|
||||
n.varDecl = n2;
|
||||
} else if (n2t !== IDENTIFIER) {
|
||||
throw se;
|
||||
} else {
|
||||
n.iterator = n2;
|
||||
n.varDecl = null;
|
||||
}
|
||||
n.object = Expression(t, x);
|
||||
} else {
|
||||
} else { // classic for
|
||||
if (n.foreach) throw t.newSyntaxError("Illegal for-each syntax");
|
||||
n.setup = n2 || null;
|
||||
t.mustMatch(SEMICOLON);
|
||||
n.condition = (t.peek() == SEMICOLON) ? null : Expression(t, x);
|
||||
|
@ -310,8 +358,7 @@ function Statement(t, x) {
|
|||
throw t.newSyntaxError("Label not found");
|
||||
} while (ss[i].label != label);
|
||||
|
||||
/*
|
||||
* Both break and continue to label need to be handled specially
|
||||
/* Both break and continue to label need to be handled specially
|
||||
* within a labeled loop, so that they target that loop. If not in
|
||||
* a loop, then break targets its labeled statement. Labels can be
|
||||
* nested so we skip all labels immediately enclosing the nearest
|
||||
|
@ -332,7 +379,7 @@ function Statement(t, x) {
|
|||
}
|
||||
} while (!ss[i].isLoop && !(tt == BREAK && ss[i].type == SWITCH));
|
||||
}
|
||||
n.target = ss[i];
|
||||
n.target = ss[i]; // cycle in the AST
|
||||
break;
|
||||
|
||||
case TRY:
|
||||
|
@ -386,7 +433,8 @@ function Statement(t, x) {
|
|||
n.body = nest(t, x, n, Statement);
|
||||
return n;
|
||||
|
||||
case VAR:
|
||||
|
||||
case VAR: // for variable declarations using the VAR and CONST keywords.
|
||||
case CONST:
|
||||
n = Variables(t, x);
|
||||
break;
|
||||
|
@ -406,6 +454,7 @@ function Statement(t, x) {
|
|||
t.scanOperand = false;
|
||||
tt = t.peek();
|
||||
t.scanOperand = true;
|
||||
// labeled statement
|
||||
if (tt == COLON) {
|
||||
label = t.token.value;
|
||||
ss = x.stmtStack;
|
||||
|
@ -420,7 +469,8 @@ function Statement(t, x) {
|
|||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
// expression statement.
|
||||
// We unget the current token to parse the expr as a whole.
|
||||
n = new Node(t, SEMICOLON);
|
||||
t.unget();
|
||||
n.expression = Expression(t, x);
|
||||
|
@ -428,6 +478,7 @@ function Statement(t, x) {
|
|||
break;
|
||||
}
|
||||
|
||||
// semicolon-insertion magic
|
||||
if (t.lineno == t.token.lineno) {
|
||||
tt = t.peekOnSameLine();
|
||||
if (tt != END && tt != NEWLINE && tt != SEMICOLON && tt != RIGHT_CURLY)
|
||||
|
@ -437,6 +488,8 @@ function Statement(t, x) {
|
|||
return n;
|
||||
}
|
||||
|
||||
// tokenizer, compiler context, boolean,
|
||||
// DECLARED_FORM or EXPRESSED_FORM or STATEMENT_FORM -> node
|
||||
function FunctionDefinition(t, x, requireName, functionForm) {
|
||||
var f = new Node(t);
|
||||
if (f.type != FUNCTION)
|
||||
|
@ -457,10 +510,13 @@ function FunctionDefinition(t, x, requireName, functionForm) {
|
|||
t.mustMatch(COMMA);
|
||||
}
|
||||
|
||||
t.mustMatch(LEFT_CURLY);
|
||||
if (t.match(LEFT_CURLY)) {
|
||||
var x2 = new CompilerContext(true);
|
||||
f.body = Script(t, x2);
|
||||
t.mustMatch(RIGHT_CURLY);
|
||||
} else { /* Expression closures (1.8) */
|
||||
f.body = Expression(t, x, COMMA);
|
||||
}
|
||||
f.end = t.token.end;
|
||||
|
||||
f.functionForm = functionForm;
|
||||
|
@ -469,12 +525,19 @@ function FunctionDefinition(t, x, requireName, functionForm) {
|
|||
return f;
|
||||
}
|
||||
|
||||
// tokenizer, compiler context -> node
|
||||
// parses a comma-separated list of var decls (and maybe initializations)
|
||||
function Variables(t, x) {
|
||||
var n = new Node(t);
|
||||
var n = new Node(t), tt, n2;
|
||||
do {
|
||||
tt = t.peek();
|
||||
if (tt === LEFT_CURLY || tt === LEFT_BRACKET) {
|
||||
n2 = Expression(t, x); // for destructuring
|
||||
} else {
|
||||
t.mustMatch(IDENTIFIER);
|
||||
var n2 = new Node(t);
|
||||
n2 = new Node(t);
|
||||
n2.name = n2.value;
|
||||
}
|
||||
if (t.match(ASSIGN)) {
|
||||
if (t.token.assignOp)
|
||||
throw t.newSyntaxError("Invalid variable initialization");
|
||||
|
@ -482,11 +545,52 @@ function Variables(t, x) {
|
|||
}
|
||||
n2.readOnly = (n.type == CONST);
|
||||
n.push(n2);
|
||||
x.varDecls.push(n2);
|
||||
// LETs use "local decls"
|
||||
if (arguments[2] !== "local decls") x.varDecls.push(n2);
|
||||
} while (t.match(COMMA));
|
||||
return n;
|
||||
}
|
||||
|
||||
// tokenizer, comp. context, EXPRESSED_FORM or STATEMENT_FORM -> node
|
||||
// doesn't handle lets in the toplevel of forloop heads
|
||||
function LetForm(t, x, form) {
|
||||
var i, n, n2, s, ss, hasLeftParen;
|
||||
|
||||
n = new Node(t);
|
||||
hasLeftParen = t.match(LEFT_PAREN);
|
||||
n2 = Variables(t, x, "local decls");
|
||||
if (hasLeftParen) {//let statement and let expression
|
||||
t.mustMatch(RIGHT_PAREN);
|
||||
n.varDecls = [];
|
||||
for (i = 0; i < n2.length; i++)
|
||||
n.varDecls.push(n2[i]);
|
||||
if (form === STATEMENT_FORM && t.peek() === RIGHT_CURLY) {
|
||||
n.type = LET_STM;
|
||||
n.body = nest(t, x, n, Block);
|
||||
} else {
|
||||
n.type = LET_EXP;
|
||||
n.body = Expression(t, x, COMMA);
|
||||
}
|
||||
} else if (form === EXPRESSED_FORM) {
|
||||
throw t.newSyntaxError("Let-definition used as expression.");
|
||||
} else {//let definition
|
||||
n.type = LET_DEF;
|
||||
//search context to find enclosing BLOCK
|
||||
ss = x.stmtStack;
|
||||
i = ss.length;
|
||||
while (ss[--i].type !== BLOCK) ; // a BLOCK *must* be found.
|
||||
s = ss[i];
|
||||
s.varDecls = s.varDecls || [];
|
||||
n.varDecls = [];
|
||||
for (i = 0; i < n2.length; i++) {
|
||||
s.varDecls.push(n2[i]); // the vars must go in the correct scope
|
||||
n.varDecls.push(n2[i]); // but the assignments must stay here
|
||||
}
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
// tokenizer, compiler context -> node
|
||||
function ParenExpression(t, x) {
|
||||
t.mustMatch(LEFT_PAREN);
|
||||
var n = Expression(t, x);
|
||||
|
@ -545,11 +649,18 @@ var opArity = {
|
|||
for (i in opArity)
|
||||
opArity[tokenIds[i]] = opArity[i];
|
||||
|
||||
// tokenizer, compiler context, optional COMMA or COLON -> node
|
||||
// When scanOperand is true the parser wants an operand (the "default" mode).
|
||||
// When it's false, the parser is expecting an operator.
|
||||
function Expression(t, x, stop) {
|
||||
var n, id, tt, operators = [], operands = [];
|
||||
var bl = x.bracketLevel, cl = x.curlyLevel, pl = x.parenLevel,
|
||||
hl = x.hookLevel;
|
||||
|
||||
// void -> node
|
||||
// Uses an operator and its operands to construct a whole expression.
|
||||
// The result of reduce isn't used by its callers. It's left on the operands
|
||||
// stack and it's retrieved from there.
|
||||
function reduce() {
|
||||
var n = operators.pop();
|
||||
var op = n.type;
|
||||
|
@ -578,7 +689,12 @@ function Expression(t, x, stop) {
|
|||
return n;
|
||||
}
|
||||
|
||||
loop:
|
||||
// If we are expecting an operator and find sth else it may not be an error,
|
||||
// because of semicolon insertion. So Expression doesn't throw for this.
|
||||
// If it turns out to be an error it is detected by various other parts of
|
||||
// the code and the msg may be obscure.
|
||||
|
||||
loop: // tt stands for token type
|
||||
while ((tt = t.get()) != END) {
|
||||
if (tt == stop &&
|
||||
x.bracketLevel == bl && x.curlyLevel == cl && x.parenLevel == pl &&
|
||||
|
@ -592,12 +708,24 @@ loop:
|
|||
// NB: cannot be empty, Statement handled that.
|
||||
break loop;
|
||||
|
||||
case LET: //parse let expressions
|
||||
//LET is not an operator, no need to assign precedence to it.
|
||||
if (!t.scanOperand) break loop;
|
||||
operands.push(LetForm(t, x, EXPRESSED_FORM));
|
||||
t.scanOperand = false;
|
||||
break;
|
||||
|
||||
case ASSIGN:
|
||||
//the parser doesn't check that the lhs of an assignment is legal,
|
||||
//so it unintentionally allows destructuring here.
|
||||
//FIXME: report illegal lhs`s in assignments.
|
||||
case HOOK:
|
||||
case COLON:
|
||||
if (t.scanOperand)
|
||||
break loop;
|
||||
|
||||
// Use >, not >=, for right-associative ASSIGN and HOOK/COLON.
|
||||
// if operators is empty, operators.top().type is undefined.
|
||||
while (opPrecedence[operators.top().type] > opPrecedence[tt] ||
|
||||
(tt == COLON && operators.top().type == ASSIGN)) {
|
||||
reduce();
|
||||
|
@ -655,11 +783,14 @@ loop:
|
|||
}
|
||||
break;
|
||||
|
||||
case YIELD:
|
||||
if (!x.inFunction) throw t.newSyntaxError("yield not in function");
|
||||
// fall thru
|
||||
|
||||
case DELETE: case VOID: case TYPEOF:
|
||||
case NOT: case BITWISE_NOT: case UNARY_PLUS: case UNARY_MINUS:
|
||||
case NEW:
|
||||
if (!t.scanOperand)
|
||||
break loop;
|
||||
if (!t.scanOperand) break loop;
|
||||
operators.push(new Node(t));
|
||||
break;
|
||||
|
||||
|
@ -683,16 +814,20 @@ loop:
|
|||
break;
|
||||
|
||||
case FUNCTION:
|
||||
if (!t.scanOperand)
|
||||
break loop;
|
||||
if (!t.scanOperand) break loop;
|
||||
operands.push(FunctionDefinition(t, x, false, EXPRESSED_FORM));
|
||||
t.scanOperand = false;
|
||||
break;
|
||||
|
||||
case NULL: case THIS: case TRUE: case FALSE:
|
||||
case IDENTIFIER: case NUMBER: case STRING: case REGEXP:
|
||||
if (!t.scanOperand)
|
||||
break loop;
|
||||
case NULL:
|
||||
case THIS:
|
||||
case TRUE:
|
||||
case FALSE:
|
||||
case IDENTIFIER:
|
||||
case NUMBER:
|
||||
case STRING:
|
||||
case REGEXP:
|
||||
if (!t.scanOperand) break loop;
|
||||
operands.push(new Node(t));
|
||||
t.scanOperand = false;
|
||||
break;
|
||||
|
@ -701,17 +836,44 @@ loop:
|
|||
if (t.scanOperand) {
|
||||
// Array initialiser. Parse using recursive descent, as the
|
||||
// sub-grammar here is not an operator grammar.
|
||||
var fi, iter, elms, x2;
|
||||
n = new Node(t, ARRAY_INIT);
|
||||
elms = 0
|
||||
while ((tt = t.peek()) != RIGHT_BRACKET) {
|
||||
elms++;
|
||||
if (tt == COMMA) {
|
||||
t.get();
|
||||
n.push(null);
|
||||
continue;
|
||||
}
|
||||
n.push(Expression(t, x, COMMA));
|
||||
if (!t.match(COMMA))
|
||||
if (t.match(FOR)) { // array comprehensions
|
||||
if (elms !== 1)
|
||||
throw t.newSyntaxError("Invalid comprehension");
|
||||
fi = new Node(t, FOR_IN);
|
||||
if (t.match(IDENTIFIER)) {
|
||||
if (t.token.value !== "each")
|
||||
throw t.newSyntaxError("Invalid comprehension");
|
||||
else
|
||||
n.foreach = true;
|
||||
}
|
||||
t.mustMatch(LEFT_PAREN);
|
||||
// x.inForLoopInit = true; won't work because this FOR
|
||||
// may be inside another expression => parenLevel !== 0
|
||||
x2 = new CompilerContext(x.inFunction);
|
||||
x2.inForLoopInit = true;
|
||||
iter = Expression(t, x2);
|
||||
if (iter.type !== IDENTIFIER)
|
||||
throw t.newSyntaxError("Invalid comprehension");
|
||||
fi.iterator = iter;
|
||||
t.mustMatch(IN);
|
||||
fi.object = Expression(t, x);
|
||||
t.mustMatch(RIGHT_PAREN);
|
||||
if (t.match(IF)) fi.condition = Expression(t, x);
|
||||
break;
|
||||
}
|
||||
if (!t.match(COMMA)) break;
|
||||
}
|
||||
t.mustMatch(RIGHT_BRACKET);
|
||||
operands.push(n);
|
||||
t.scanOperand = false;
|
||||
|
@ -732,8 +894,7 @@ loop:
|
|||
break;
|
||||
|
||||
case LEFT_CURLY:
|
||||
if (!t.scanOperand)
|
||||
break loop;
|
||||
if (!t.scanOperand) break loop;
|
||||
// Object initialiser. As for array initialisers (see above),
|
||||
// parse using recursive descent.
|
||||
++x.curlyLevel;
|
||||
|
@ -852,6 +1013,7 @@ loop:
|
|||
return operands.pop();
|
||||
}
|
||||
|
||||
// file ptr, path to file, line number -> node
|
||||
function parse(s, f, l) {
|
||||
var t = new Tokenizer(s, f, l);
|
||||
var x = new CompilerContext(false);
|
||||
|
@ -860,3 +1022,4 @@ function parse(s, f, l) {
|
|||
throw t.newSyntaxError("Syntax error");
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче