Allow ZWJ and ZWNJ to continue identifiers as specified

This commit is contained in:
Nick Guerrera 2021-04-24 18:45:40 -07:00
Родитель 5fb28d6378
Коммит 3ac9719b17
5 изменённых файлов: 91 добавлений и 9 удалений

Просмотреть файл

@ -223,6 +223,13 @@ export function isAsciiIdentifierContinue(ch: number): boolean {
);
}
export function isIdentifierStart(codePoint: number) {
return (
isAsciiIdentifierStart(codePoint) ||
(codePoint > CharCode.MaxAscii && isNonAsciiIdentifierStart(codePoint))
);
}
export function isIdentifierContinue(codePoint: number) {
return (
isAsciiIdentifierContinue(codePoint) ||

Просмотреть файл

@ -4,15 +4,19 @@
//
// Based on:
// - http://www.unicode.org/reports/tr31/
// - https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
// - https://www.ecma-international.org/ecma-262/11.0/#sec-names-and-keywords
//
// ADL's identifier naming rules are currently the same as JavaScript's.
//
/**
* @internal
*
* Map of non-ascii characters that are valid at the start of an identifier.
* Each pair of numbers represents an inclusive range of code points.
*
* Corresponds to code points outside the ASCII range with property ID_Start or
* Other_ID_Start.
*/
// prettier-ignore
export const nonAsciiIdentifierStartMap: readonly number[] = [
@ -641,8 +645,12 @@ export const nonAsciiIdentifierStartMap: readonly number[] = [
/**
* @internal
*
* Map of non-ascii chacters that are valid after the first character in and identifier.
* Each pair of numbers represents an inclusive range of code points.
* Map of non-ascii chacters that are valid after the first character in and
* identifier. Each pair of numbers represents an inclusive range of code
* points.
*
* Corresponds to code points outside the ASCII range with property ID_Continue,
* Other_ID_Start, or Other_ID_Continue, plus ZWNJ and ZWJ.
*/
//prettier-ignore
export const nonAsciiIdentifierContinueMap: readonly number[] = [
@ -943,6 +951,7 @@ export const nonAsciiIdentifierContinueMap: readonly number[] = [
0x1fe0, 0x1fec,
0x1ff2, 0x1ff4,
0x1ff6, 0x1ffc,
0x200c, 0x200d,
0x203f, 0x2040,
0x2054, 0x2054,
0x2071, 0x2071,

Просмотреть файл

@ -9,8 +9,11 @@ import { fileURLToPath } from "url";
const MIN_NONASCII_CODEPOINT = 0x80;
const MAX_UNICODE_CODEPOINT = 0x10ffff;
const isStartRegex = /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u;
const isContinueRegex = /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u;
// Includes Other_ID_Start
const isStartRegex = /[\p{ID_Start}]/u;
// Includes Other_ID_Start and Other_ID_Continue
const isContinueRegex = /[\p{ID_Continue}\u{200c}\u{200d}]/u;
function isStart(c) {
return isStartRegex.test(c);
@ -50,15 +53,19 @@ const src = `//
//
// Based on:
// - http://www.unicode.org/reports/tr31/
// - https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
// - https://www.ecma-international.org/ecma-262/11.0/#sec-names-and-keywords
//
// ADL's identifier naming rules are currently the same as JavaScript's.
//
/**
* @internal
*
* Map of non-ascii characters that are valid at the start of an identifier.
* Each pair of numbers represents an inclusive range of code points.
*
* Corresponds to code points outside the ASCII range with property ID_Start or
* Other_ID_Start.
*/
// prettier-ignore
export const nonAsciiIdentifierStartMap: readonly number[] = [
@ -68,8 +75,12 @@ ${formatPairs(startMap)}
/**
* @internal
*
* Map of non-ascii chacters that are valid after the first character in and identifier.
* Each pair of numbers represents an inclusive range of code points.
* Map of non-ascii chacters that are valid after the first character in and
* identifier. Each pair of numbers represents an inclusive range of code
* points.
*
* Corresponds to code points outside the ASCII range with property ID_Continue,
* Other_ID_Start, or Other_ID_Continue, plus ZWNJ and ZWJ.
*/
//prettier-ignore
export const nonAsciiIdentifierContinueMap: readonly number[] = [

Просмотреть файл

@ -250,7 +250,13 @@ describe("syntax", () => {
});
describe("non-ascii identifiers", () => {
parseEach(["model Incompréhensible {}", "model 𐌰𐌲 {}", "model Banana𐌰𐌲42Banana {}"]);
parseEach([
"model Incompréhensible {}",
"model 𐌰𐌲 {}",
"model Banana𐌰𐌲42Banana {}",
"model deaf\u{200c}ly {}", // ZWNJ
"model क्‍ष {}", // ZWJ
]);
parseErrorEach([["model 😢 {}", [/Invalid character/]]]);
});
});

Просмотреть файл

@ -1,6 +1,7 @@
import assert from "assert";
import { readFile } from "fs/promises";
import { URL } from "url";
import { isIdentifierContinue, isIdentifierStart } from "../compiler/charcode.js";
import { throwOnError } from "../compiler/diagnostics.js";
import {
createScanner,
@ -311,6 +312,54 @@ describe("scanner", () => {
assert.strictEqual(TokenDisplay[Token.Identifier], "<identifier>");
});
// Search for Other_ID_Start in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
const otherIDStart = [0x1885, 0x1886, 0x2118, 0x212e, 0x309b, 0x309c];
// Search for Other_ID_Continue in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
const otherIdContinue = [
0x00b7,
0x0387,
0x1369,
0x136a,
0x136b,
0x136c,
0x136d,
0x136e,
0x136f,
0x1370,
0x1371,
0x19da,
];
it("allows additional identifier start characters", () => {
assert(isIdentifierStart("$".codePointAt(0)!), "'$' should be allowed to start identifier.");
assert(isIdentifierStart("_".codePointAt(0)!), "'_' should be allowed to start identifier.");
for (const codePoint of otherIDStart) {
assert(
isIdentifierStart(codePoint),
`U+${codePoint.toString(16)} should be allowed to start identifier.`
);
}
});
it("allows additional identifier continuation characters", () => {
//prettier-ignore
assert(isIdentifierContinue("$".codePointAt(0)!), "'$' should be allowed to continue identifier.");
//prettier-ignore
assert(isIdentifierContinue("_".codePointAt(0)!), "'_' should be allowed to continue identifier.");
for (const codePoint of [...otherIDStart, ...otherIdContinue]) {
assert(
isIdentifierContinue(codePoint),
`U+${codePoint.toString(16)} should be allowed to continue identifier.`
);
}
assert(isIdentifierContinue(0x200c), "U+200C (ZWNJ) should be allowed to continue identifier.");
assert(isIdentifierContinue(0x200d), "U+200D (ZWJ) should be allowed to continue identifier.");
});
it("scans this file", async () => {
const text = await readFile(new URL(import.meta.url), "utf-8");
tokens(text, function () {