Allow ZWJ and ZWNJ to continue identifiers as specified
This commit is contained in:
Родитель
5fb28d6378
Коммит
3ac9719b17
|
@ -223,6 +223,13 @@ export function isAsciiIdentifierContinue(ch: number): boolean {
|
|||
);
|
||||
}
|
||||
|
||||
export function isIdentifierStart(codePoint: number) {
|
||||
return (
|
||||
isAsciiIdentifierStart(codePoint) ||
|
||||
(codePoint > CharCode.MaxAscii && isNonAsciiIdentifierStart(codePoint))
|
||||
);
|
||||
}
|
||||
|
||||
export function isIdentifierContinue(codePoint: number) {
|
||||
return (
|
||||
isAsciiIdentifierContinue(codePoint) ||
|
||||
|
|
|
@ -4,15 +4,19 @@
|
|||
//
|
||||
// Based on:
|
||||
// - http://www.unicode.org/reports/tr31/
|
||||
// - https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
|
||||
// - https://www.ecma-international.org/ecma-262/11.0/#sec-names-and-keywords
|
||||
//
|
||||
// ADL's identifier naming rules are currently the same as JavaScript's.
|
||||
//
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii characters that are valid at the start of an identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
*
|
||||
* Corresponds to code points outside the ASCII range with property ID_Start or
|
||||
* Other_ID_Start.
|
||||
*/
|
||||
// prettier-ignore
|
||||
export const nonAsciiIdentifierStartMap: readonly number[] = [
|
||||
|
@ -641,8 +645,12 @@ export const nonAsciiIdentifierStartMap: readonly number[] = [
|
|||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii chacters that are valid after the first character in and identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
* Map of non-ascii chacters that are valid after the first character in and
|
||||
* identifier. Each pair of numbers represents an inclusive range of code
|
||||
* points.
|
||||
*
|
||||
* Corresponds to code points outside the ASCII range with property ID_Continue,
|
||||
* Other_ID_Start, or Other_ID_Continue, plus ZWNJ and ZWJ.
|
||||
*/
|
||||
//prettier-ignore
|
||||
export const nonAsciiIdentifierContinueMap: readonly number[] = [
|
||||
|
@ -943,6 +951,7 @@ export const nonAsciiIdentifierContinueMap: readonly number[] = [
|
|||
0x1fe0, 0x1fec,
|
||||
0x1ff2, 0x1ff4,
|
||||
0x1ff6, 0x1ffc,
|
||||
0x200c, 0x200d,
|
||||
0x203f, 0x2040,
|
||||
0x2054, 0x2054,
|
||||
0x2071, 0x2071,
|
||||
|
|
|
@ -9,8 +9,11 @@ import { fileURLToPath } from "url";
|
|||
const MIN_NONASCII_CODEPOINT = 0x80;
|
||||
const MAX_UNICODE_CODEPOINT = 0x10ffff;
|
||||
|
||||
const isStartRegex = /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u;
|
||||
const isContinueRegex = /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u;
|
||||
// Includes Other_ID_Start
|
||||
const isStartRegex = /[\p{ID_Start}]/u;
|
||||
|
||||
// Includes Other_ID_Start and Other_ID_Continue
|
||||
const isContinueRegex = /[\p{ID_Continue}\u{200c}\u{200d}]/u;
|
||||
|
||||
function isStart(c) {
|
||||
return isStartRegex.test(c);
|
||||
|
@ -50,15 +53,19 @@ const src = `//
|
|||
//
|
||||
// Based on:
|
||||
// - http://www.unicode.org/reports/tr31/
|
||||
// - https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
|
||||
// - https://www.ecma-international.org/ecma-262/11.0/#sec-names-and-keywords
|
||||
//
|
||||
// ADL's identifier naming rules are currently the same as JavaScript's.
|
||||
//
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii characters that are valid at the start of an identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
*
|
||||
* Corresponds to code points outside the ASCII range with property ID_Start or
|
||||
* Other_ID_Start.
|
||||
*/
|
||||
// prettier-ignore
|
||||
export const nonAsciiIdentifierStartMap: readonly number[] = [
|
||||
|
@ -68,8 +75,12 @@ ${formatPairs(startMap)}
|
|||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii chacters that are valid after the first character in and identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
* Map of non-ascii chacters that are valid after the first character in and
|
||||
* identifier. Each pair of numbers represents an inclusive range of code
|
||||
* points.
|
||||
*
|
||||
* Corresponds to code points outside the ASCII range with property ID_Continue,
|
||||
* Other_ID_Start, or Other_ID_Continue, plus ZWNJ and ZWJ.
|
||||
*/
|
||||
//prettier-ignore
|
||||
export const nonAsciiIdentifierContinueMap: readonly number[] = [
|
||||
|
|
|
@ -250,7 +250,13 @@ describe("syntax", () => {
|
|||
});
|
||||
|
||||
describe("non-ascii identifiers", () => {
|
||||
parseEach(["model Incompréhensible {}", "model 𐌰𐌲 {}", "model Banana𐌰𐌲42Banana {}"]);
|
||||
parseEach([
|
||||
"model Incompréhensible {}",
|
||||
"model 𐌰𐌲 {}",
|
||||
"model Banana𐌰𐌲42Banana {}",
|
||||
"model deaf\u{200c}ly {}", // ZWNJ
|
||||
"model क्ष {}", // ZWJ
|
||||
]);
|
||||
parseErrorEach([["model 😢 {}", [/Invalid character/]]]);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import assert from "assert";
|
||||
import { readFile } from "fs/promises";
|
||||
import { URL } from "url";
|
||||
import { isIdentifierContinue, isIdentifierStart } from "../compiler/charcode.js";
|
||||
import { throwOnError } from "../compiler/diagnostics.js";
|
||||
import {
|
||||
createScanner,
|
||||
|
@ -311,6 +312,54 @@ describe("scanner", () => {
|
|||
assert.strictEqual(TokenDisplay[Token.Identifier], "<identifier>");
|
||||
});
|
||||
|
||||
// Search for Other_ID_Start in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
||||
const otherIDStart = [0x1885, 0x1886, 0x2118, 0x212e, 0x309b, 0x309c];
|
||||
|
||||
// Search for Other_ID_Continue in https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
||||
const otherIdContinue = [
|
||||
0x00b7,
|
||||
0x0387,
|
||||
0x1369,
|
||||
0x136a,
|
||||
0x136b,
|
||||
0x136c,
|
||||
0x136d,
|
||||
0x136e,
|
||||
0x136f,
|
||||
0x1370,
|
||||
0x1371,
|
||||
0x19da,
|
||||
];
|
||||
|
||||
it("allows additional identifier start characters", () => {
|
||||
assert(isIdentifierStart("$".codePointAt(0)!), "'$' should be allowed to start identifier.");
|
||||
assert(isIdentifierStart("_".codePointAt(0)!), "'_' should be allowed to start identifier.");
|
||||
|
||||
for (const codePoint of otherIDStart) {
|
||||
assert(
|
||||
isIdentifierStart(codePoint),
|
||||
`U+${codePoint.toString(16)} should be allowed to start identifier.`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("allows additional identifier continuation characters", () => {
|
||||
//prettier-ignore
|
||||
assert(isIdentifierContinue("$".codePointAt(0)!), "'$' should be allowed to continue identifier.");
|
||||
//prettier-ignore
|
||||
assert(isIdentifierContinue("_".codePointAt(0)!), "'_' should be allowed to continue identifier.");
|
||||
|
||||
for (const codePoint of [...otherIDStart, ...otherIdContinue]) {
|
||||
assert(
|
||||
isIdentifierContinue(codePoint),
|
||||
`U+${codePoint.toString(16)} should be allowed to continue identifier.`
|
||||
);
|
||||
}
|
||||
|
||||
assert(isIdentifierContinue(0x200c), "U+200C (ZWNJ) should be allowed to continue identifier.");
|
||||
assert(isIdentifierContinue(0x200d), "U+200D (ZWJ) should be allowed to continue identifier.");
|
||||
});
|
||||
|
||||
it("scans this file", async () => {
|
||||
const text = await readFile(new URL(import.meta.url), "utf-8");
|
||||
tokens(text, function () {
|
||||
|
|
Загрузка…
Ссылка в новой задаче