Check in a script to regenerate non-ascii identifier maps (#483)
This commit is contained in:
Родитель
3b35f162c1
Коммит
5de3f5347b
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -39,6 +39,7 @@
|
|||
"dogfood": "node scripts/dogfood.js",
|
||||
"test": "mocha --timeout 5000 --require source-map-support/register --ignore 'dist/test/manual/**/*.js' 'dist/test/**/*.js'",
|
||||
"regen-samples": "node scripts/regen-samples.js",
|
||||
"regen-nonascii-maps": "node scripts/regen-nonascii-maps.js",
|
||||
"fuzz": "node dist/test/manual/fuzz.js"
|
||||
},
|
||||
"dependencies": {
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
// Regenerate the tables used to scan non-ASCII identifiers.
|
||||
// ADL's rules currently follow JavaScript's rules and this script is adapted from:
|
||||
// https://github.com/microsoft/TypeScript/blob/c552a4bf827cf56680d5cb1510dd948a3787ea38/scripts/regenerate-unicode-identifier-parts.js
|
||||
|
||||
import { writeFileSync } from "fs";
|
||||
import { resolve } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const MIN_NONASCII_CODEPOINT = 0x80;
|
||||
const MAX_UNICODE_CODEPOINT = 0x10ffff;
|
||||
|
||||
const isStartRegex = /[\p{ID_Start}\u{2118}\u{212E}\u{309B}\u{309C}]/u;
|
||||
const isContinueRegex = /[\p{ID_Continue}\u{00B7}\u{0387}\u{19DA}\u{1369}\u{136A}\u{136B}\u{136C}\u{136D}\u{136E}\u{136F}\u{1370}\u{1371}]/u;
|
||||
|
||||
function isStart(c) {
|
||||
return isStartRegex.test(c);
|
||||
}
|
||||
|
||||
function isContinue(c) {
|
||||
return isContinueRegex.test(c) || isStart(c);
|
||||
}
|
||||
|
||||
function formatPairs(array) {
|
||||
let s = "";
|
||||
for (let i = 0; i < array.length; i += 2) {
|
||||
s += ` 0x${array[i].toString(16)}, 0x${array[i + 1].toString(16)},\n`;
|
||||
}
|
||||
return s.slice(0, -1);
|
||||
}
|
||||
|
||||
const continueMap = [];
|
||||
const startMap = [];
|
||||
let continueActive = false;
|
||||
let startActive = false;
|
||||
|
||||
for (let i = MIN_NONASCII_CODEPOINT; i < MAX_UNICODE_CODEPOINT; i++) {
|
||||
if (isStart(String.fromCodePoint(i)) !== startActive) {
|
||||
startMap.push(startActive ? i - 1 : i);
|
||||
startActive = !startActive;
|
||||
}
|
||||
if (isContinue(String.fromCodePoint(i)) !== continueActive) {
|
||||
continueMap.push(continueActive ? i - 1 : i);
|
||||
continueActive = !continueActive;
|
||||
}
|
||||
}
|
||||
|
||||
const src = `//
|
||||
// Generated by scripts/regen-nonascii-map.js on node ${process.version} with
|
||||
// unicode ${process.versions.unicode}.
|
||||
//
|
||||
// Based on:
|
||||
// - http://www.unicode.org/reports/tr31/
|
||||
// - https://www.ecma-international.org/ecma-262/6.0/#sec-names-and-keywords
|
||||
//
|
||||
// ADL's identifier naming rules are currently the same as JavaScript's.
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii characters that are valid at the start of an identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
*/
|
||||
// prettier-ignore
|
||||
export const nonAsciiIdentifierStartMap: readonly number[] = [
|
||||
${formatPairs(startMap)}
|
||||
];
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*
|
||||
* Map of non-ascii chacters that are valid after the first character in and identifier.
|
||||
* Each pair of numbers represents an inclusive range of code points.
|
||||
*/
|
||||
//prettier-ignore
|
||||
export const nonAsciiIdentifierContinueMap: readonly number[] = [
|
||||
${formatPairs(continueMap)}
|
||||
];
|
||||
`;
|
||||
|
||||
const file = resolve(fileURLToPath(import.meta.url), "../../compiler/non-ascii-maps.ts");
|
||||
writeFileSync(file, src);
|
Загрузка…
Ссылка в новой задаче