More scanner optimization and fixes
* Don't allocate substrings to match keywords. * Make at most one pass over string to get its unquoted/unescaped/unindented value. (**) * Add test coverage for impacted code paths. * Fix issue where string value of an unterminated string was missing final character(s) in string value. (**) Actually, we still make two passes in the case of a non-triple-quoted, multi-line string with \r\n, but that is about to be removed by the next commit which will disallow non-triple-quoted, multi-line strings altogether.
This commit is contained in:
Родитель
158dd31eb2
Коммит
44faaadf01
|
@ -144,11 +144,15 @@ export const enum CharCode {
|
||||||
Tilde = 0x7e,
|
Tilde = 0x7e,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function utf16CodeUnits(codePoint: number) {
|
||||||
|
return codePoint >= 0x10000 ? 2 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
export function isAsciiLineBreak(ch: number) {
|
export function isAsciiLineBreak(ch: number) {
|
||||||
return ch === CharCode.LineFeed || ch == CharCode.CarriageReturn;
|
return ch === CharCode.LineFeed || ch == CharCode.CarriageReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiWhiteSpaceSingleLine(ch: number): boolean {
|
export function isAsciiWhiteSpaceSingleLine(ch: number) {
|
||||||
return (
|
return (
|
||||||
ch === CharCode.Space ||
|
ch === CharCode.Space ||
|
||||||
ch === CharCode.Tab ||
|
ch === CharCode.Tab ||
|
||||||
|
@ -186,25 +190,29 @@ export function isWhiteSpaceSingleLine(ch: number) {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isLineBreak(ch: number): boolean {
|
export function isLineBreak(ch: number) {
|
||||||
return isAsciiLineBreak(ch) || (ch > CharCode.MaxAscii && isNonAsciiLineBreak(ch));
|
return isAsciiLineBreak(ch) || (ch > CharCode.MaxAscii && isNonAsciiLineBreak(ch));
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isDigit(ch: number): boolean {
|
export function isDigit(ch: number) {
|
||||||
return ch >= CharCode._0 && ch <= CharCode._9;
|
return ch >= CharCode._0 && ch <= CharCode._9;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isHexDigit(ch: number): boolean {
|
export function isHexDigit(ch: number) {
|
||||||
return (
|
return (
|
||||||
isDigit(ch) || (ch >= CharCode.A && ch <= CharCode.F) || (ch >= CharCode.a && ch <= CharCode.f)
|
isDigit(ch) || (ch >= CharCode.A && ch <= CharCode.F) || (ch >= CharCode.a && ch <= CharCode.f)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isBinaryDigit(ch: number): boolean {
|
export function isBinaryDigit(ch: number) {
|
||||||
return ch === CharCode._0 || ch === CharCode._1;
|
return ch === CharCode._0 || ch === CharCode._1;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiIdentifierStart(ch: number): boolean {
|
export function isLowercaseAsciiLetter(ch: number) {
|
||||||
|
return ch >= CharCode.a && ch <= CharCode.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isAsciiIdentifierStart(ch: number) {
|
||||||
return (
|
return (
|
||||||
(ch >= CharCode.A && ch <= CharCode.Z) ||
|
(ch >= CharCode.A && ch <= CharCode.Z) ||
|
||||||
(ch >= CharCode.a && ch <= CharCode.z) ||
|
(ch >= CharCode.a && ch <= CharCode.z) ||
|
||||||
|
@ -213,7 +221,7 @@ export function isAsciiIdentifierStart(ch: number): boolean {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isAsciiIdentifierContinue(ch: number): boolean {
|
export function isAsciiIdentifierContinue(ch: number) {
|
||||||
return (
|
return (
|
||||||
(ch >= CharCode.A && ch <= CharCode.Z) ||
|
(ch >= CharCode.A && ch <= CharCode.Z) ||
|
||||||
(ch >= CharCode.a && ch <= CharCode.z) ||
|
(ch >= CharCode.a && ch <= CharCode.z) ||
|
||||||
|
@ -245,7 +253,7 @@ export function isNonAsciiIdentifierContinue(codePoint: number) {
|
||||||
return lookupInNonAsciiMap(codePoint, nonAsciiIdentifierContinueMap);
|
return lookupInNonAsciiMap(codePoint, nonAsciiIdentifierContinueMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
function lookupInNonAsciiMap(codePoint: number, map: readonly number[]): boolean {
|
function lookupInNonAsciiMap(codePoint: number, map: readonly number[]) {
|
||||||
// Bail out quickly if it couldn't possibly be in the map.
|
// Bail out quickly if it couldn't possibly be in the map.
|
||||||
if (codePoint < map[0]) {
|
if (codePoint < map[0]) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -7,11 +7,13 @@ import {
|
||||||
isHexDigit,
|
isHexDigit,
|
||||||
isIdentifierContinue,
|
isIdentifierContinue,
|
||||||
isLineBreak,
|
isLineBreak,
|
||||||
|
isLowercaseAsciiLetter,
|
||||||
isNonAsciiIdentifierContinue,
|
isNonAsciiIdentifierContinue,
|
||||||
isNonAsciiIdentifierStart,
|
isNonAsciiIdentifierStart,
|
||||||
isNonAsciiLineBreak,
|
isNonAsciiLineBreak,
|
||||||
isNonAsciiWhiteSpaceSingleLine,
|
isNonAsciiWhiteSpaceSingleLine,
|
||||||
isWhiteSpaceSingleLine,
|
isWhiteSpaceSingleLine,
|
||||||
|
utf16CodeUnits,
|
||||||
} from "./charcode.js";
|
} from "./charcode.js";
|
||||||
import { createSourceFile, Message, throwOnError } from "./diagnostics.js";
|
import { createSourceFile, Message, throwOnError } from "./diagnostics.js";
|
||||||
import { SourceFile } from "./types.js";
|
import { SourceFile } from "./types.js";
|
||||||
|
@ -91,16 +93,16 @@ const MaxStatementKeyword = Token.AliasKeyword;
|
||||||
|
|
||||||
/** @internal */
|
/** @internal */
|
||||||
export const TokenDisplay: readonly string[] = [
|
export const TokenDisplay: readonly string[] = [
|
||||||
"<none>",
|
"none",
|
||||||
"<invalid>",
|
"invalid",
|
||||||
"<end of file>",
|
"end of file",
|
||||||
"<single-line comment>",
|
"single-line comment",
|
||||||
"<multi-line comment>",
|
"multi-line comment",
|
||||||
"<newline>",
|
"newline",
|
||||||
"<whitespace>",
|
"whitespace",
|
||||||
"<conflict marker>",
|
"conflict marker",
|
||||||
"<numeric literal>",
|
"numeric literal",
|
||||||
"<string literal>",
|
"string literal",
|
||||||
"'{'",
|
"'{'",
|
||||||
"'}'",
|
"'}'",
|
||||||
"'('",
|
"'('",
|
||||||
|
@ -119,7 +121,7 @@ export const TokenDisplay: readonly string[] = [
|
||||||
"'?'",
|
"'?'",
|
||||||
"':'",
|
"':'",
|
||||||
"'@'",
|
"'@'",
|
||||||
"<identifier>",
|
"identifier",
|
||||||
"'import'",
|
"'import'",
|
||||||
"'model'",
|
"'model'",
|
||||||
"'namespace'",
|
"'namespace'",
|
||||||
|
@ -133,7 +135,7 @@ export const TokenDisplay: readonly string[] = [
|
||||||
];
|
];
|
||||||
|
|
||||||
/** @internal */
|
/** @internal */
|
||||||
export const Keywords: ReadonlyMap<string, Token> = new Map([
|
export const Keywords: readonly [string, Token][] = [
|
||||||
["import", Token.ImportKeyword],
|
["import", Token.ImportKeyword],
|
||||||
["model", Token.ModelKeyword],
|
["model", Token.ModelKeyword],
|
||||||
["namespace", Token.NamespaceKeyword],
|
["namespace", Token.NamespaceKeyword],
|
||||||
|
@ -144,14 +146,30 @@ export const Keywords: ReadonlyMap<string, Token> = new Map([
|
||||||
["alias", Token.AliasKeyword],
|
["alias", Token.AliasKeyword],
|
||||||
["true", Token.TrueKeyword],
|
["true", Token.TrueKeyword],
|
||||||
["false", Token.FalseKeyword],
|
["false", Token.FalseKeyword],
|
||||||
]);
|
];
|
||||||
|
|
||||||
/** @internal */
|
/** @internal */
|
||||||
export const enum KeywordLimit {
|
export const enum KeywordLimit {
|
||||||
MinLength = 2,
|
MinLength = 2,
|
||||||
|
// If this ever exceeds 10, we will overflow the keyword map key, needing 11*5
|
||||||
|
// = 55 bits or more, exceeding the JavaScript safe integer range. We would
|
||||||
|
// have to change the keyword lookup algorithm in that case.
|
||||||
MaxLength = 9,
|
MaxLength = 9,
|
||||||
MinStartChar = CharCode.a,
|
}
|
||||||
MaxStartChar = CharCode.u,
|
|
||||||
|
const KeywordMap: ReadonlyMap<number, Token> = new Map(
|
||||||
|
Keywords.map((e) => [keywordKey(e[0]), e[1]])
|
||||||
|
);
|
||||||
|
|
||||||
|
// Since keywords are short and all lowercase, we can pack the whole string into
|
||||||
|
// a single number by using 5 bits for each letter, and use that as the map key.
|
||||||
|
// This lets us lookup keywords without making temporary substrings.
|
||||||
|
function keywordKey(keyword: string) {
|
||||||
|
let key = 0;
|
||||||
|
for (let i = 0; i < keyword.length; i++) {
|
||||||
|
key = (key << 5) | (keyword.charCodeAt(i) - CharCode.a);
|
||||||
|
}
|
||||||
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Scanner {
|
export interface Scanner {
|
||||||
|
@ -190,6 +208,7 @@ const enum TokenFlags {
|
||||||
HasCrlf = 1 << 0,
|
HasCrlf = 1 << 0,
|
||||||
Escaped = 1 << 1,
|
Escaped = 1 << 1,
|
||||||
TripleQuoted = 1 << 2,
|
TripleQuoted = 1 << 2,
|
||||||
|
Unterminated = 1 << 3,
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isLiteral(token: Token) {
|
export function isLiteral(token: Token) {
|
||||||
|
@ -226,9 +245,8 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
const file = typeof source === "string" ? createSourceFile(source, "<anonymous file>") : source;
|
const file = typeof source === "string" ? createSourceFile(source, "<anonymous file>") : source;
|
||||||
const input = file.text;
|
const input = file.text;
|
||||||
let position = 0;
|
let position = 0;
|
||||||
let token = Token.Invalid;
|
let token = Token.None;
|
||||||
let tokenPosition = -1;
|
let tokenPosition = -1;
|
||||||
let tokenValue: string | undefined = undefined;
|
|
||||||
let tokenFlags = TokenFlags.None;
|
let tokenFlags = TokenFlags.None;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -252,26 +270,20 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
return position >= input.length;
|
return position >= input.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
function next(t: Token, count = 1) {
|
|
||||||
position += count;
|
|
||||||
return (token = t);
|
|
||||||
}
|
|
||||||
|
|
||||||
function utf16CodeUnits(codePoint: number) {
|
|
||||||
return codePoint >= 0x10000 ? 2 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getTokenText() {
|
function getTokenText() {
|
||||||
return input.substring(tokenPosition, position);
|
return input.substring(tokenPosition, position);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getTokenValue() {
|
||||||
|
return token === Token.StringLiteral ? getStringTokenValue() : getTokenText();
|
||||||
|
}
|
||||||
|
|
||||||
function lookAhead(offset: number) {
|
function lookAhead(offset: number) {
|
||||||
return input.charCodeAt(position + offset);
|
return input.charCodeAt(position + offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scan(): Token {
|
function scan(): Token {
|
||||||
tokenPosition = position;
|
tokenPosition = position;
|
||||||
tokenValue = undefined;
|
|
||||||
tokenFlags = TokenFlags.None;
|
tokenFlags = TokenFlags.None;
|
||||||
|
|
||||||
if (!eof()) {
|
if (!eof()) {
|
||||||
|
@ -390,10 +402,14 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
: scanString();
|
: scanString();
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (isAsciiIdentifierStart(ch)) {
|
if (isLowercaseAsciiLetter(ch)) {
|
||||||
return scanIdentifierOrKeyword();
|
return scanIdentifierOrKeyword();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isAsciiIdentifierStart(ch)) {
|
||||||
|
return scanIdentifier();
|
||||||
|
}
|
||||||
|
|
||||||
if (ch <= CharCode.MaxAscii) {
|
if (ch <= CharCode.MaxAscii) {
|
||||||
return scanInvalidCharacter();
|
return scanInvalidCharacter();
|
||||||
}
|
}
|
||||||
|
@ -405,6 +421,17 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
return (token = Token.EndOfFile);
|
return (token = Token.EndOfFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function next(t: Token, count = 1) {
|
||||||
|
position += count;
|
||||||
|
return (token = t);
|
||||||
|
}
|
||||||
|
|
||||||
|
function unterminated(t: Token) {
|
||||||
|
tokenFlags |= TokenFlags.Unterminated;
|
||||||
|
error(Message.Unterminated, [TokenDisplay[t]]);
|
||||||
|
return (token = t);
|
||||||
|
}
|
||||||
|
|
||||||
function scanNonAsciiToken() {
|
function scanNonAsciiToken() {
|
||||||
const ch = input.charCodeAt(position);
|
const ch = input.charCodeAt(position);
|
||||||
|
|
||||||
|
@ -416,9 +443,9 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
return scanWhitespace();
|
return scanWhitespace();
|
||||||
}
|
}
|
||||||
|
|
||||||
const codePoint = input.codePointAt(position)!;
|
let cp = input.codePointAt(position)!;
|
||||||
if (isNonAsciiIdentifierStart(codePoint)) {
|
if (isNonAsciiIdentifierStart(cp)) {
|
||||||
return scanNonAsciiIdentifierContinue(codePoint);
|
return scanNonAsciiIdentifier(cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
return scanInvalidCharacter();
|
return scanInvalidCharacter();
|
||||||
|
@ -527,11 +554,10 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
function scanSingleLineComment() {
|
function scanSingleLineComment() {
|
||||||
position += 2; // consume '//'
|
position += 2; // consume '//'
|
||||||
|
|
||||||
while (!eof()) {
|
for (; !eof(); position++) {
|
||||||
if (isLineBreak(input.charCodeAt(position))) {
|
if (isLineBreak(input.charCodeAt(position))) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
position++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (token = Token.SingleLineComment);
|
return (token = Token.SingleLineComment);
|
||||||
|
@ -540,22 +566,20 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
function scanMultiLineComment() {
|
function scanMultiLineComment() {
|
||||||
position += 2; // consume '/*'
|
position += 2; // consume '/*'
|
||||||
|
|
||||||
while (!eof()) {
|
for (; !eof(); position++) {
|
||||||
if (input.charCodeAt(position) === CharCode.Asterisk && lookAhead(1) === CharCode.Slash) {
|
if (input.charCodeAt(position) === CharCode.Asterisk && lookAhead(1) === CharCode.Slash) {
|
||||||
position += 2;
|
position += 2;
|
||||||
return (token = Token.MultiLineComment);
|
return (token = Token.MultiLineComment);
|
||||||
}
|
}
|
||||||
position++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
error(Message.Unterminated, ["comment"]);
|
return unterminated(Token.MultiLineComment);
|
||||||
return (token = Token.MultiLineComment);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function scanString() {
|
function scanString() {
|
||||||
position++; // consume '"'
|
position++; // consume '"'
|
||||||
|
|
||||||
loop: while (!eof()) {
|
loop: for (; !eof(); position++) {
|
||||||
const ch = input.charCodeAt(position);
|
const ch = input.charCodeAt(position);
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
case CharCode.CarriageReturn:
|
case CharCode.CarriageReturn:
|
||||||
|
@ -570,150 +594,157 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
if (eof()) {
|
if (eof()) {
|
||||||
break loop;
|
break loop;
|
||||||
}
|
}
|
||||||
break;
|
continue;
|
||||||
case CharCode.DoubleQuote:
|
case CharCode.DoubleQuote:
|
||||||
position++;
|
position++;
|
||||||
return (token = Token.StringLiteral);
|
return (token = Token.StringLiteral);
|
||||||
}
|
}
|
||||||
position++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
error(Message.Unterminated, ["string literal"]);
|
return unterminated(Token.StringLiteral);
|
||||||
return (token = Token.StringLiteral);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function scanTripleQuotedString() {
|
function scanTripleQuotedString() {
|
||||||
tokenFlags |= TokenFlags.TripleQuoted;
|
tokenFlags |= TokenFlags.TripleQuoted;
|
||||||
position += 3; // consume '"""'
|
position += 3; // consume '"""'
|
||||||
|
|
||||||
loop: while (!eof()) {
|
for (; !eof(); position++) {
|
||||||
const ch = input.charCodeAt(position);
|
if (
|
||||||
switch (ch) {
|
input.charCodeAt(position) === CharCode.DoubleQuote &&
|
||||||
case CharCode.CarriageReturn:
|
lookAhead(1) === CharCode.DoubleQuote &&
|
||||||
if (lookAhead(1) === CharCode.LineFeed) {
|
lookAhead(2) === CharCode.DoubleQuote
|
||||||
tokenFlags |= TokenFlags.HasCrlf;
|
) {
|
||||||
position++;
|
position += 3;
|
||||||
}
|
return (token = Token.StringLiteral);
|
||||||
break;
|
|
||||||
case CharCode.Backslash:
|
|
||||||
tokenFlags |= TokenFlags.Escaped;
|
|
||||||
position++;
|
|
||||||
if (eof()) {
|
|
||||||
break loop;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CharCode.DoubleQuote:
|
|
||||||
if (lookAhead(1) === CharCode.DoubleQuote && lookAhead(2) === CharCode.DoubleQuote) {
|
|
||||||
position += 3;
|
|
||||||
return (token = Token.StringLiteral);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
position++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
error(Message.Unterminated, ["string literal"]);
|
return unterminated(Token.StringLiteral);
|
||||||
return (token = Token.StringLiteral);
|
|
||||||
}
|
|
||||||
|
|
||||||
function getTokenValue() {
|
|
||||||
if (tokenValue !== undefined) {
|
|
||||||
return tokenValue;
|
|
||||||
}
|
|
||||||
return (tokenValue = token === Token.StringLiteral ? getStringTokenValue() : getTokenText());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function getStringTokenValue() {
|
function getStringTokenValue() {
|
||||||
// strip quotes
|
|
||||||
const quoteLength = tokenFlags & TokenFlags.TripleQuoted ? 3 : 1;
|
const quoteLength = tokenFlags & TokenFlags.TripleQuoted ? 3 : 1;
|
||||||
let value = input.substring(tokenPosition + quoteLength, position - quoteLength);
|
const start = tokenPosition + quoteLength;
|
||||||
|
const end = tokenFlags & TokenFlags.Unterminated ? position : position - quoteLength;
|
||||||
// Normalize CRLF to LF when interpreting value of multi-line string
|
|
||||||
// literals. Matches JavaScript behavior and ensures program behavior does
|
|
||||||
// not change due to line-ending conversion.
|
|
||||||
if (tokenFlags & TokenFlags.HasCrlf) {
|
|
||||||
value = value.replace(/\r\n/g, "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tokenFlags & TokenFlags.TripleQuoted) {
|
if (tokenFlags & TokenFlags.TripleQuoted) {
|
||||||
value = unindentTripleQuoteString(value);
|
return unindentAndUnescapeTripleQuotedString(start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tokenFlags & TokenFlags.Escaped) {
|
if (tokenFlags & TokenFlags.Escaped) {
|
||||||
value = unescapeString(value);
|
return unescapeString(start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (tokenValue = value);
|
let value = input.substring(start, end);
|
||||||
|
if (tokenFlags & TokenFlags.HasCrlf) {
|
||||||
|
value = value.replace(/\r\n/g, "\n");
|
||||||
|
}
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
function unindentTripleQuoteString(text: string) {
|
function unindentAndUnescapeTripleQuotedString(start: number, end: number) {
|
||||||
let start = 0;
|
|
||||||
let end = text.length;
|
|
||||||
|
|
||||||
// ignore leading whitespace before required initial line break
|
// ignore leading whitespace before required initial line break
|
||||||
while (start < end && isWhiteSpaceSingleLine(text.charCodeAt(start))) {
|
while (start < end && isWhiteSpaceSingleLine(input.charCodeAt(start))) {
|
||||||
start++;
|
start++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove required initial line break
|
// remove required initial line break
|
||||||
if (isLineBreak(text.charCodeAt(start))) {
|
if (isLineBreak(input.charCodeAt(start))) {
|
||||||
|
if (isCrlf(start, start, end)) {
|
||||||
|
start++;
|
||||||
|
}
|
||||||
start++;
|
start++;
|
||||||
} else {
|
} else {
|
||||||
error(Message.NoNewLineAtStartOfTripleQuotedString);
|
error(Message.NoNewLineAtStartOfTripleQuotedString);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove whitespace before closing delimiter and record it as
|
// remove whitespace before closing delimiter and record it as required
|
||||||
// required indentation for all lines.
|
// indentation for all lines
|
||||||
while (end > start && isWhiteSpaceSingleLine(text.charCodeAt(end - 1))) {
|
const indentationEnd = end;
|
||||||
|
while (end > start && isWhiteSpaceSingleLine(input.charCodeAt(end - 1))) {
|
||||||
end--;
|
end--;
|
||||||
}
|
}
|
||||||
const indentation = text.substring(end, text.length);
|
const indentationStart = end;
|
||||||
|
|
||||||
// remove required final line break
|
// remove required final line break
|
||||||
if (isLineBreak(text.charCodeAt(end - 1))) {
|
if (isLineBreak(input.charCodeAt(end - 1))) {
|
||||||
|
if (isCrlf(end - 2, start, end)) {
|
||||||
|
end--;
|
||||||
|
}
|
||||||
end--;
|
end--;
|
||||||
} else {
|
} else {
|
||||||
error(Message.NoNewLineAtEndOfTripleQuotedString);
|
error(Message.NoNewLineAtEndOfTripleQuotedString);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove required matching indentation from each line
|
// remove required matching indentation from each line and unescape in the
|
||||||
return removeMatchingIndentation(text, start, end, indentation);
|
// process of doing so
|
||||||
}
|
|
||||||
|
|
||||||
function removeMatchingIndentation(
|
|
||||||
text: string,
|
|
||||||
start: number,
|
|
||||||
end: number,
|
|
||||||
indentation: string
|
|
||||||
) {
|
|
||||||
let result = "";
|
let result = "";
|
||||||
let pos = start;
|
let pos = start;
|
||||||
|
|
||||||
while (pos < end) {
|
while (pos < end) {
|
||||||
start = skipMatchingIndentation(text, pos, end, indentation);
|
// skip indentation at start of line
|
||||||
while (pos < end && !isLineBreak(text.charCodeAt(pos))) {
|
start = skipMatchingIndentation(pos, end, indentationStart, indentationEnd);
|
||||||
pos++;
|
let ch;
|
||||||
|
|
||||||
|
while (pos < end && !isLineBreak((ch = input.charCodeAt(pos)))) {
|
||||||
|
if (ch !== CharCode.Backslash) {
|
||||||
|
pos++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
result += input.substring(start, pos);
|
||||||
|
if (pos === end - 1) {
|
||||||
|
error(Message.InvalidEscapeSequence);
|
||||||
|
pos++;
|
||||||
|
} else {
|
||||||
|
result += unescapeOne(pos);
|
||||||
|
pos += 2;
|
||||||
|
}
|
||||||
|
start = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pos < end) {
|
if (pos < end) {
|
||||||
pos++; // include line break
|
if (isCrlf(pos, start, end)) {
|
||||||
|
// CRLF in multi-line string is normalized to LF in string value.
|
||||||
|
// This keeps program behavior unchanged by line-eding conversion.
|
||||||
|
result += input.substring(start, pos);
|
||||||
|
result += "\n";
|
||||||
|
pos += 2;
|
||||||
|
} else {
|
||||||
|
pos++; // include non-CRLF newline
|
||||||
|
result += input.substring(start, pos);
|
||||||
|
}
|
||||||
|
start = pos;
|
||||||
}
|
}
|
||||||
result += text.substring(start, pos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result += input.substring(start, pos);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function skipMatchingIndentation(text: string, pos: number, end: number, indentation: string) {
|
function isCrlf(pos: number, start: number, end: number) {
|
||||||
end = Math.min(end, pos + indentation.length);
|
return (
|
||||||
|
pos >= start &&
|
||||||
|
pos < end - 1 &&
|
||||||
|
input.charCodeAt(pos) === CharCode.CarriageReturn &&
|
||||||
|
input.charCodeAt(pos + 1) === CharCode.LineFeed
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function skipMatchingIndentation(
|
||||||
|
pos: number,
|
||||||
|
end: number,
|
||||||
|
indentationStart: number,
|
||||||
|
indentationEnd: number
|
||||||
|
) {
|
||||||
|
let indentationPos = indentationStart;
|
||||||
|
end = Math.min(end, pos + (indentationEnd - indentationStart));
|
||||||
|
|
||||||
let indentationPos = 0;
|
|
||||||
while (pos < end) {
|
while (pos < end) {
|
||||||
const ch = text.charCodeAt(pos);
|
const ch = input.charCodeAt(pos);
|
||||||
if (isLineBreak(ch)) {
|
if (isLineBreak(ch)) {
|
||||||
// allow subset of indentation if line has only whitespace
|
// allow subset of indentation if line has only whitespace
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ch != indentation.charCodeAt(indentationPos)) {
|
if (ch !== input.charCodeAt(indentationPos)) {
|
||||||
error(Message.InconsistentTripleQuoteIndentation);
|
error(Message.InconsistentTripleQuoteIndentation);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -724,76 +755,86 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
function unescapeString(text: string) {
|
function unescapeString(start: number, end: number) {
|
||||||
let result = "";
|
let result = "";
|
||||||
let start = 0;
|
let pos = start;
|
||||||
let pos = 0;
|
|
||||||
const end = text.length;
|
|
||||||
|
|
||||||
while (pos < end) {
|
while (pos < end) {
|
||||||
let ch = text.charCodeAt(pos);
|
let ch = input.charCodeAt(pos);
|
||||||
if (ch != CharCode.Backslash) {
|
if (ch !== CharCode.Backslash) {
|
||||||
pos++;
|
pos++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
result += text.substring(start, pos);
|
if (pos === end - 1) {
|
||||||
pos++;
|
error(Message.InvalidEscapeSequence);
|
||||||
ch = text.charCodeAt(pos);
|
break;
|
||||||
|
|
||||||
switch (ch) {
|
|
||||||
case CharCode.r:
|
|
||||||
result += "\r";
|
|
||||||
break;
|
|
||||||
case CharCode.n:
|
|
||||||
result += "\n";
|
|
||||||
break;
|
|
||||||
case CharCode.t:
|
|
||||||
result += "\t";
|
|
||||||
break;
|
|
||||||
case CharCode.DoubleQuote:
|
|
||||||
result += '"';
|
|
||||||
break;
|
|
||||||
case CharCode.Backslash:
|
|
||||||
result += "\\";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
error(Message.InvalidEscapeSequence);
|
|
||||||
result += String.fromCharCode(ch);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pos++;
|
result += input.substring(start, pos);
|
||||||
|
result += unescapeOne(pos);
|
||||||
|
pos += 2;
|
||||||
start = pos;
|
start = pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
result += text.substring(start, pos);
|
result += input.substring(start, pos);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scanIdentifierOrKeyword() {
|
function unescapeOne(pos: number) {
|
||||||
const startChar = input.charCodeAt(position);
|
const ch = input.charCodeAt(pos + 1);
|
||||||
let ch = startChar;
|
switch (ch) {
|
||||||
do {
|
case CharCode.r:
|
||||||
position++;
|
return "\r";
|
||||||
} while (!eof() && isAsciiIdentifierContinue((ch = input.charCodeAt(position))));
|
case CharCode.n:
|
||||||
|
return "\n";
|
||||||
|
case CharCode.t:
|
||||||
|
return "\t";
|
||||||
|
case CharCode.DoubleQuote:
|
||||||
|
return '"';
|
||||||
|
case CharCode.Backslash:
|
||||||
|
return "\\";
|
||||||
|
default:
|
||||||
|
error(Message.InvalidEscapeSequence);
|
||||||
|
return String.fromCharCode(ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ch > CharCode.MaxAscii) {
|
function scanIdentifierOrKeyword() {
|
||||||
const codePoint = input.codePointAt(position)!;
|
let key = 0;
|
||||||
if (isNonAsciiIdentifierContinue(codePoint)) {
|
let count = 0;
|
||||||
return scanNonAsciiIdentifierContinue(codePoint);
|
let ch = input.charCodeAt(position);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
position++;
|
||||||
|
count++;
|
||||||
|
key = (key << 5) | (ch - CharCode.a);
|
||||||
|
|
||||||
|
if (eof()) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ch = input.charCodeAt(position);
|
||||||
|
if (count < KeywordLimit.MaxLength && isLowercaseAsciiLetter(ch)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isAsciiIdentifierContinue(ch)) {
|
||||||
|
return scanIdentifier();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch > CharCode.MaxAscii) {
|
||||||
|
const cp = input.codePointAt(position)!;
|
||||||
|
if (isNonAsciiIdentifierContinue(cp)) {
|
||||||
|
return scanNonAsciiIdentifier(cp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const length = position - tokenPosition;
|
if (count >= KeywordLimit.MinLength && count <= KeywordLimit.MaxLength) {
|
||||||
if (
|
const keyword = KeywordMap.get(key);
|
||||||
length >= KeywordLimit.MinLength &&
|
|
||||||
length <= KeywordLimit.MaxLength &&
|
|
||||||
startChar >= KeywordLimit.MinStartChar &&
|
|
||||||
startChar <= KeywordLimit.MaxStartChar
|
|
||||||
) {
|
|
||||||
tokenValue = getTokenText();
|
|
||||||
const keyword = Keywords.get(tokenValue);
|
|
||||||
if (keyword) {
|
if (keyword) {
|
||||||
return (token = keyword);
|
return (token = keyword);
|
||||||
}
|
}
|
||||||
|
@ -802,11 +843,31 @@ export function createScanner(source: string | SourceFile, onError = throwOnErro
|
||||||
return (token = Token.Identifier);
|
return (token = Token.Identifier);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scanNonAsciiIdentifierContinue(startCodePoint: number) {
|
function scanIdentifier() {
|
||||||
let codePoint = startCodePoint;
|
let ch: number;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
position += utf16CodeUnits(codePoint);
|
position++;
|
||||||
} while (!eof() && isIdentifierContinue((codePoint = input.codePointAt(position)!)));
|
if (eof()) {
|
||||||
|
return (token = Token.Identifier);
|
||||||
|
}
|
||||||
|
} while (isAsciiIdentifierContinue((ch = input.charCodeAt(position))));
|
||||||
|
|
||||||
|
if (ch > CharCode.MaxAscii) {
|
||||||
|
let cp = input.codePointAt(position)!;
|
||||||
|
if (isNonAsciiIdentifierContinue(cp)) {
|
||||||
|
return scanNonAsciiIdentifier(cp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (token = Token.Identifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
function scanNonAsciiIdentifier(startCodePoint: number) {
|
||||||
|
let cp = startCodePoint;
|
||||||
|
do {
|
||||||
|
position += utf16CodeUnits(cp);
|
||||||
|
} while (!eof() && isIdentifierContinue((cp = input.codePointAt(position)!)));
|
||||||
|
|
||||||
return (token = Token.Identifier);
|
return (token = Token.Identifier);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import assert from "assert";
|
import assert from "assert";
|
||||||
|
import { CharCode } from "../compiler/charcode.js";
|
||||||
import { logDiagnostics, logVerboseTestOutput } from "../compiler/diagnostics.js";
|
import { logDiagnostics, logVerboseTestOutput } from "../compiler/diagnostics.js";
|
||||||
import { hasParseError, NodeFlags, parse } from "../compiler/parser.js";
|
import { hasParseError, NodeFlags, parse } from "../compiler/parser.js";
|
||||||
import { ADLScriptNode, SyntaxKind } from "../compiler/types.js";
|
import { ADLScriptNode, SyntaxKind } from "../compiler/types.js";
|
||||||
|
@ -223,16 +224,25 @@ describe("syntax", () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("unterminated tokens", () => {
|
describe("unterminated tokens", () => {
|
||||||
parseErrorEach([
|
parseErrorEach([["/* Yada yada yada", [/Unterminated multi-line comment/]]]);
|
||||||
['alias X = "banana', [/Unterminated string literal/]],
|
|
||||||
['alias X = "banana\\', [/Unterminated string literal/]],
|
const strings = ['"banana', '"banana\\', '"""\nbanana', '"""\nbanana\\'];
|
||||||
['alias X = """\nbanana', [/Unterminated string literal/]],
|
parseErrorEach(
|
||||||
['alias X = """\nbanana\\', [/Unterminated string literal/]],
|
Array.from(strings.entries()).map((e) => [
|
||||||
["/* Yada yada yada", [/Unterminated comment/]],
|
`alias ${String.fromCharCode(CharCode.A + e[0])} = ${e[1]}`,
|
||||||
]);
|
[/Unterminated string literal/],
|
||||||
|
(node) => {
|
||||||
|
const statement = node.statements[0];
|
||||||
|
assert(statement.kind === SyntaxKind.AliasStatement, "alias statement expected");
|
||||||
|
const value = statement.value;
|
||||||
|
assert(value.kind === SyntaxKind.StringLiteral, "string literal expected");
|
||||||
|
assert.strictEqual(value.value, "banana");
|
||||||
|
},
|
||||||
|
])
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("terminated tokens at EOF with missing semicolon", () => {
|
describe("terminated tokens at EOF", () => {
|
||||||
parseErrorEach([
|
parseErrorEach([
|
||||||
["alias X = 0x10101", [/';' expected/]],
|
["alias X = 0x10101", [/';' expected/]],
|
||||||
["alias X = 0xBEEF", [/';' expected/]],
|
["alias X = 0xBEEF", [/';' expected/]],
|
||||||
|
@ -305,16 +315,68 @@ describe("syntax", () => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("non-ascii identifiers", () => {
|
describe("identifiers", () => {
|
||||||
parseEach([
|
const good = [
|
||||||
"model Incompréhensible {}",
|
"short",
|
||||||
"model 𐌰𐌲 {}",
|
"short42",
|
||||||
"model Banana𐌰𐌲42Banana {}",
|
"lowercaseandlong",
|
||||||
"model deaf\u{200c}ly {}", // ZWNJ
|
"lowercaseandlong42",
|
||||||
"model क्ष {}", // ZWJ
|
"camelCase",
|
||||||
]);
|
"camelCase42",
|
||||||
parseErrorEach([["model 😢 {}", [/Invalid character/]]]);
|
"PascalCase",
|
||||||
|
"PascalCase42",
|
||||||
|
"has_underscore",
|
||||||
|
"has_$dollar",
|
||||||
|
"_startsWithUnderscore",
|
||||||
|
"$startsWithDollar",
|
||||||
|
"Incompréhensible",
|
||||||
|
"incompréhensible",
|
||||||
|
"IncomprÉhensible",
|
||||||
|
"incomprÉhensible",
|
||||||
|
// leading astral character
|
||||||
|
"𐌰𐌲",
|
||||||
|
// continuing astral character
|
||||||
|
"Banana𐌰𐌲42Banana",
|
||||||
|
"banana𐌰𐌲42banana",
|
||||||
|
// ZWNJ
|
||||||
|
"deaf\u{200c}ly",
|
||||||
|
// ZWJ
|
||||||
|
"क्ष",
|
||||||
|
];
|
||||||
|
|
||||||
|
const bad: [string, RegExp][] = [
|
||||||
|
["😢", /Invalid character/],
|
||||||
|
["42", /Identifier expected/],
|
||||||
|
["true", /Keyword cannot be used as identifier/],
|
||||||
|
];
|
||||||
|
|
||||||
|
parseEach(
|
||||||
|
good.map((s) => [
|
||||||
|
`model ${s} {}`,
|
||||||
|
(node) => {
|
||||||
|
const statement = node.statements[0];
|
||||||
|
assert(statement.kind === SyntaxKind.ModelStatement, "Model statement expected.");
|
||||||
|
assert.strictEqual(statement.id.sv, s);
|
||||||
|
},
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
|
parseErrorEach(bad.map((e) => [`model ${e[0]} {}`, [e[1]]]));
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// smaller repro of previous regen-samples baseline failures
|
||||||
|
describe("sample regressions", () => {
|
||||||
|
parseEach([
|
||||||
|
[
|
||||||
|
`/* \\n <-- before string! */ @format("\\\\w") model M {}`,
|
||||||
|
(node) => {
|
||||||
|
assert(node.statements[0].kind === SyntaxKind.ModelStatement);
|
||||||
|
assert(node.statements[0].decorators[0].arguments[0].kind === SyntaxKind.StringLiteral);
|
||||||
|
assert.strictEqual(node.statements[0].decorators[0].arguments[0].value, "\\w");
|
||||||
|
},
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
describe("enum statements", () => {
|
describe("enum statements", () => {
|
||||||
parseEach([
|
parseEach([
|
||||||
|
@ -344,7 +406,9 @@ describe("syntax", () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
function parseEach(cases: (string | [string, (node: ADLScriptNode) => void])[]) {
|
type Callback = (node: ADLScriptNode) => void;
|
||||||
|
|
||||||
|
function parseEach(cases: (string | [string, Callback])[]) {
|
||||||
for (const each of cases) {
|
for (const each of cases) {
|
||||||
const code = typeof each === "string" ? each : each[0];
|
const code = typeof each === "string" ? each : each[0];
|
||||||
const callback = typeof each === "string" ? undefined : each[1];
|
const callback = typeof each === "string" ? undefined : each[1];
|
||||||
|
@ -377,13 +441,16 @@ function parseEach(cases: (string | [string, (node: ADLScriptNode) => void])[])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseErrorEach(cases: [string, RegExp[]][]) {
|
function parseErrorEach(cases: [string, RegExp[], Callback?][], significantWhitespace = false) {
|
||||||
for (const [code, matches] of cases) {
|
for (const [code, matches, callback] of cases) {
|
||||||
it(`doesn't parse ${shorten(code)}`, () => {
|
it(`doesn't parse ${shorten(code)}`, () => {
|
||||||
logVerboseTestOutput("=== Source ===");
|
logVerboseTestOutput("=== Source ===");
|
||||||
logVerboseTestOutput(code);
|
logVerboseTestOutput(code);
|
||||||
|
|
||||||
const astNode = parse(code);
|
const astNode = parse(code);
|
||||||
|
if (callback) {
|
||||||
|
callback(astNode);
|
||||||
|
}
|
||||||
logVerboseTestOutput("\n=== Parse Result ===");
|
logVerboseTestOutput("\n=== Parse Result ===");
|
||||||
dumpAST(astNode);
|
dumpAST(astNode);
|
||||||
|
|
||||||
|
@ -404,7 +471,7 @@ function parseErrorEach(cases: [string, RegExp[]][]) {
|
||||||
|
|
||||||
function dumpAST(astNode: ADLScriptNode) {
|
function dumpAST(astNode: ADLScriptNode) {
|
||||||
logVerboseTestOutput((log) => {
|
logVerboseTestOutput((log) => {
|
||||||
const hasErrors = hasParseError(astNode); // force flags to initialize
|
hasParseError(astNode); // force flags to initialize
|
||||||
const json = JSON.stringify(astNode, replacer, 2);
|
const json = JSON.stringify(astNode, replacer, 2);
|
||||||
log(json);
|
log(json);
|
||||||
});
|
});
|
||||||
|
|
|
@ -2,7 +2,7 @@ import assert from "assert";
|
||||||
import { readFile } from "fs/promises";
|
import { readFile } from "fs/promises";
|
||||||
import { URL } from "url";
|
import { URL } from "url";
|
||||||
import { isIdentifierContinue, isIdentifierStart } from "../compiler/charcode.js";
|
import { isIdentifierContinue, isIdentifierStart } from "../compiler/charcode.js";
|
||||||
import { throwOnError } from "../compiler/diagnostics.js";
|
import { createDiagnostic, formatDiagnostic, throwOnError } from "../compiler/diagnostics.js";
|
||||||
import {
|
import {
|
||||||
createScanner,
|
createScanner,
|
||||||
isKeyword,
|
isKeyword,
|
||||||
|
@ -180,11 +180,21 @@ describe("scanner", () => {
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
function scanString(text: string, expectedValue: string) {
|
function scanString(text: string, expectedValue: string, expectedDiagnostic?: RegExp) {
|
||||||
const scanner = createScanner(text);
|
const scanner = createScanner(text, (message, target, args) => {
|
||||||
|
const diagnostic = createDiagnostic(message, target, args);
|
||||||
|
if (expectedDiagnostic) {
|
||||||
|
assert.match(diagnostic.message, expectedDiagnostic);
|
||||||
|
} else {
|
||||||
|
assert.fail("No diagnostic expected, but got " + formatDiagnostic(diagnostic));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
assert.strictEqual(scanner.scan(), Token.StringLiteral);
|
assert.strictEqual(scanner.scan(), Token.StringLiteral);
|
||||||
assert.strictEqual(scanner.token, Token.StringLiteral);
|
assert.strictEqual(scanner.token, Token.StringLiteral);
|
||||||
assert.strictEqual(scanner.getTokenText(), text);
|
if (!expectedDiagnostic) {
|
||||||
|
assert.strictEqual(scanner.getTokenText(), text);
|
||||||
|
}
|
||||||
assert.strictEqual(scanner.getTokenValue(), expectedValue);
|
assert.strictEqual(scanner.getTokenValue(), expectedValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -202,19 +212,24 @@ describe("scanner", () => {
|
||||||
|
|
||||||
it("scans triple-quoted strings", () => {
|
it("scans triple-quoted strings", () => {
|
||||||
scanString(
|
scanString(
|
||||||
|
// NOTE: sloppy blank line formatting and trailing whitespace after open
|
||||||
|
// quotes above is deliberate here and deliberately tolerated by
|
||||||
|
// the scanner.
|
||||||
`"""
|
`"""
|
||||||
This is a triple-quoted string
|
This is a triple-quoted string
|
||||||
|
|
||||||
|
|
||||||
|
"You do not need to escape lone quotes"
|
||||||
And this is another line
|
You can use escape sequences: \\r \\n \\t \\\\ \\"
|
||||||
"""`,
|
"""`,
|
||||||
// NOTE: sloppy blank line formatting and trailing whitespace after open
|
'This is a triple-quoted string\n\n\n"You do not need to escape lone quotes"\nYou can use escape sequences: \r \n \t \\ "'
|
||||||
// quotes above is deliberately tolerated.
|
|
||||||
"This is a triple-quoted string\n\n\n\nAnd this is another line"
|
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("normalizes CRLF to LF in multi-line string", () => {
|
||||||
|
scanString('"""\r\nThis\r\nis\r\na\r\ntest\r\n"""', "This\nis\na\ntest");
|
||||||
|
});
|
||||||
|
|
||||||
it("provides token position", () => {
|
it("provides token position", () => {
|
||||||
const all = tokens("a x\raa x\r\naaa x\naaaa x\u{2028}aaaaa x\u{2029}aaaaaa x");
|
const all = tokens("a x\raa x\r\naaa x\naaaa x\u{2028}aaaaa x\u{2029}aaaaaa x");
|
||||||
verify(all, [
|
verify(all, [
|
||||||
|
@ -263,14 +278,15 @@ describe("scanner", () => {
|
||||||
const nonStatementKeywords = [Token.ExtendsKeyword, Token.TrueKeyword, Token.FalseKeyword];
|
const nonStatementKeywords = [Token.ExtendsKeyword, Token.TrueKeyword, Token.FalseKeyword];
|
||||||
let minKeywordLengthFound = Number.MAX_SAFE_INTEGER;
|
let minKeywordLengthFound = Number.MAX_SAFE_INTEGER;
|
||||||
let maxKeywordLengthFound = Number.MIN_SAFE_INTEGER;
|
let maxKeywordLengthFound = Number.MIN_SAFE_INTEGER;
|
||||||
let minKeywordStartCharFound = Number.MAX_SAFE_INTEGER;
|
|
||||||
let maxKeywordStartCharFound = Number.MIN_SAFE_INTEGER;
|
|
||||||
|
|
||||||
for (const [name, token] of Keywords.entries()) {
|
for (const [name, token] of Keywords) {
|
||||||
|
assert.match(
|
||||||
|
name,
|
||||||
|
/^[a-z]+$/,
|
||||||
|
"We need to change the keyword lookup algorithm in the scanner if we ever add a keyword that is not all lowercase ascii letters."
|
||||||
|
);
|
||||||
minKeywordLengthFound = Math.min(minKeywordLengthFound, name.length);
|
minKeywordLengthFound = Math.min(minKeywordLengthFound, name.length);
|
||||||
maxKeywordLengthFound = Math.max(maxKeywordLengthFound, name.length);
|
maxKeywordLengthFound = Math.max(maxKeywordLengthFound, name.length);
|
||||||
minKeywordStartCharFound = Math.min(minKeywordStartCharFound, name.charCodeAt(0));
|
|
||||||
maxKeywordStartCharFound = Math.max(maxKeywordStartCharFound, name.charCodeAt(0));
|
|
||||||
|
|
||||||
assert.strictEqual(TokenDisplay[token], `'${name}'`);
|
assert.strictEqual(TokenDisplay[token], `'${name}'`);
|
||||||
assert(isKeyword(token), `${name} should be classified as a keyword`);
|
assert(isKeyword(token), `${name} should be classified as a keyword`);
|
||||||
|
@ -289,15 +305,10 @@ describe("scanner", () => {
|
||||||
KeywordLimit.MaxLength,
|
KeywordLimit.MaxLength,
|
||||||
`max keyword length is incorrect, set KeywordLimit.MaxLength to ${maxKeywordLengthFound}`
|
`max keyword length is incorrect, set KeywordLimit.MaxLength to ${maxKeywordLengthFound}`
|
||||||
);
|
);
|
||||||
assert.strictEqual(
|
|
||||||
minKeywordStartCharFound,
|
assert(
|
||||||
KeywordLimit.MinStartChar,
|
maxKeywordLengthFound < 11,
|
||||||
`min keyword start char is incorrect, set KeywordLimit.MinStartChar to ${minKeywordStartCharFound}`
|
"We need to change the keyword lookup algorithm in the scanner if we ever add a keyword with 11 characters or more."
|
||||||
);
|
|
||||||
assert.strictEqual(
|
|
||||||
maxKeywordStartCharFound,
|
|
||||||
KeywordLimit.MaxStartChar,
|
|
||||||
`max keyword start char is incorrect, set KeywordLimit.MaxStartChar to ${maxKeywordStartCharFound}`
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// check single character punctuation
|
// check single character punctuation
|
||||||
|
@ -317,15 +328,15 @@ describe("scanner", () => {
|
||||||
|
|
||||||
// check the rest
|
// check the rest
|
||||||
assert.strictEqual(TokenDisplay[Token.Elipsis], "'...'");
|
assert.strictEqual(TokenDisplay[Token.Elipsis], "'...'");
|
||||||
assert.strictEqual(TokenDisplay[Token.None], "<none>");
|
assert.strictEqual(TokenDisplay[Token.None], "none");
|
||||||
assert.strictEqual(TokenDisplay[Token.Invalid], "<invalid>");
|
assert.strictEqual(TokenDisplay[Token.Invalid], "invalid");
|
||||||
assert.strictEqual(TokenDisplay[Token.EndOfFile], "<end of file>");
|
assert.strictEqual(TokenDisplay[Token.EndOfFile], "end of file");
|
||||||
assert.strictEqual(TokenDisplay[Token.SingleLineComment], "<single-line comment>");
|
assert.strictEqual(TokenDisplay[Token.SingleLineComment], "single-line comment");
|
||||||
assert.strictEqual(TokenDisplay[Token.MultiLineComment], "<multi-line comment>");
|
assert.strictEqual(TokenDisplay[Token.MultiLineComment], "multi-line comment");
|
||||||
assert.strictEqual(TokenDisplay[Token.NewLine], "<newline>");
|
assert.strictEqual(TokenDisplay[Token.NewLine], "newline");
|
||||||
assert.strictEqual(TokenDisplay[Token.Whitespace], "<whitespace>");
|
assert.strictEqual(TokenDisplay[Token.Whitespace], "whitespace");
|
||||||
assert.strictEqual(TokenDisplay[Token.ConflictMarker], "<conflict marker>");
|
assert.strictEqual(TokenDisplay[Token.ConflictMarker], "conflict marker");
|
||||||
assert.strictEqual(TokenDisplay[Token.Identifier], "<identifier>");
|
assert.strictEqual(TokenDisplay[Token.Identifier], "identifier");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Search for Other_ID_Start in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
// Search for Other_ID_Start in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
||||||
|
|
Загрузка…
Ссылка в новой задаче