Fixes microsoft/vscode#119918: Avoid using oniguruma find options since they cause freezes tokenizing minified files with the JS grammar

This commit is contained in:
Alex Dima 2021-03-26 16:02:20 +01:00
Родитель b3410c6635
Коммит eeff31f4c7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 6E58D7B045760DA0
10 изменённых файлов: 3828 добавлений и 19 удалений

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -33,7 +33,7 @@ async function tokenizeFile(filePath, scope, message) {
function loadGrammar(scopeName) {
let grammarPath = null;
if (scopeName === 'source.js') {
grammarPath = path.resolve(__dirname, '..', 'test-cases/first-mate/fixtures/javascript.json');
grammarPath = path.resolve(__dirname, 'JavaScript.tmLanguage.json');
} else if (scopeName === 'source.ts') {
grammarPath = path.resolve(__dirname, '..', 'test-cases/themes/syntaxes/TypeScript.tmLanguage.json');
} else if (scopeName === 'source.css') {
@ -54,6 +54,7 @@ async function test() {
await tokenizeFile(path.join(__dirname, 'bootstrap.min.css.txt'), 'source.css', 'Bootstrap CSS v3.1.1 minified')
await tokenizeFile(path.join(__dirname, 'large.min.js.txt'), 'source.js', 'jQuery v2.0.3 minified');
await tokenizeFile(path.join(__dirname, 'main.08642f99.css.txt'), 'source.css', 'Bootstrap with multi-byte minified')
await tokenizeFile(path.join(__dirname, 'minified.js.txt'), 'source.js', 'Simple minified file');
};
test();

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

6
package-lock.json сгенерированный
Просмотреть файл

@ -1445,9 +1445,9 @@
"dev": true
},
"vscode-oniguruma": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.5.0.tgz",
"integrity": "sha512-5MgXSXvwy2GBy8BRi1Pegk+60LAHiY184cW93Fe1SmA9oPu8fRV3G9Yurbhkh/0hrb0E0/VK4JPieBX66gkftQ==",
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.5.1.tgz",
"integrity": "sha512-JrBZH8DCC262TEYcYdeyZusiETu0Vli0xFgdRwNJjDcObcRjbmJP+IFcA3ScBwIXwgFHYKbAgfxtM/Cl+3Spjw==",
"dev": true
},
"watchpack": {

Просмотреть файл

@ -31,7 +31,7 @@
"@types/tape": "^4.13.0",
"tape": "^5.2.2",
"typescript": "^4.2.3",
"vscode-oniguruma": "^1.5.0",
"vscode-oniguruma": "^1.5.1",
"webpack": "^5.24.4",
"webpack-cli": "^4.5.0"
}

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -5,3 +5,5 @@
export const DebugFlags = {
InDebugMode: (typeof process !== 'undefined' && !!process.env['VSCODE_TEXTMATE_DEBUG'])
};
export const UseOnigurumaFindOptions = false;

Просмотреть файл

@ -7,7 +7,7 @@ import { IRawGrammar, IRawRepository, IRawRule, IOnigLib, IOnigCaptureIndex, Oni
import { IRuleRegistry, IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, BeginWhileRule, MatchRule, CompiledRule } from './rule';
import { createMatchers, Matcher } from './matcher';
import { MetadataConsts, IGrammar, ITokenizeLineResult, ITokenizeLineResult2, IToken, IEmbeddedLanguagesMap, StandardTokenType, StackElement as StackElementDef, ITokenTypeMap } from './main';
import { DebugFlags } from './debug';
import { DebugFlags, UseOnigurumaFindOptions } from './debug';
import { FontStyle, ThemeTrieElementRule } from './theme';
declare let performance: { now: () => number } | undefined;
@ -664,6 +664,26 @@ function getFindOptions(allowA: boolean, allowG: boolean): number {
return options;
}
function prepareRuleSearch(rule: Rule, grammar: Grammar, endRegexSource: string | null, allowA: boolean, allowG: boolean): { ruleScanner: CompiledRule; findOptions: number; } {
if (UseOnigurumaFindOptions) {
const ruleScanner = rule.compile(grammar, endRegexSource);
const findOptions = getFindOptions(allowA, allowG);
return { ruleScanner, findOptions };
}
const ruleScanner = rule.compileAG(grammar, endRegexSource, allowA, allowG);
return { ruleScanner, findOptions: FindOption.None };
}
function prepareRuleWhileSearch(rule: BeginWhileRule, grammar: Grammar, endRegexSource: string | null, allowA: boolean, allowG: boolean): { ruleScanner: CompiledRule; findOptions: number; } {
if (UseOnigurumaFindOptions) {
const ruleScanner = rule.compileWhile(grammar, endRegexSource);
const findOptions = getFindOptions(allowA, allowG);
return { ruleScanner, findOptions };
}
const ruleScanner = rule.compileWhileAG(grammar, endRegexSource, allowA, allowG);
return { ruleScanner, findOptions: FindOption.None };
}
function matchInjections(injections: Injection[], grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchInjectionsResult | null {
// The lower the better
let bestMatchRating = Number.MAX_VALUE;
@ -679,8 +699,8 @@ function matchInjections(injections: Injection[], grammar: Grammar, lineText: On
// injection selector doesn't match stack
continue;
}
const ruleScanner = grammar.getRule(injection.ruleId).compile(grammar, null);
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
const rule = grammar.getRule(injection.ruleId);
const { ruleScanner, findOptions } = prepareRuleSearch(rule, grammar, null, isFirstLine, linePos === anchorPosition);
const matchResult = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
if (DebugFlags.InDebugMode) {
console.log(' scanning for injections');
@ -726,8 +746,7 @@ interface IMatchResult {
function matchRule(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchResult | null {
const rule = stack.getRule(grammar);
const ruleScanner = rule.compile(grammar, stack.endRule);
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
const { ruleScanner, findOptions } = prepareRuleSearch(rule, grammar, stack.endRule, isFirstLine, linePos === anchorPosition);
let perfStart = 0;
if (DebugFlags.InDebugMode) {
@ -821,8 +840,7 @@ function _checkWhileConditions(grammar: Grammar, lineText: OnigString, isFirstLi
}
for (let whileRule = whileRules.pop(); whileRule; whileRule = whileRules.pop()) {
const ruleScanner = whileRule.rule.compileWhile(grammar, whileRule.stack.endRule);
const findOptions = getFindOptions(isFirstLine, anchorPosition === linePos);
const { ruleScanner, findOptions } = prepareRuleWhileSearch(whileRule.rule, grammar, whileRule.stack.endRule, isFirstLine, linePos === anchorPosition);
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
if (DebugFlags.InDebugMode) {
console.log(' scanning for while rule');

Просмотреть файл

@ -83,6 +83,8 @@ export abstract class Rule {
public abstract collectPatternsRecursive(grammar: IRuleRegistry, out: RegExpSourceList, isFirst: boolean): void;
public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule;
public abstract compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule;
}
export interface ICompilePatternsResult {
@ -110,13 +112,26 @@ export class CaptureRule extends Rule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
throw new Error('Not supported!');
}
public compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
throw new Error('Not supported!');
}
}
interface IRegExpSourceAnchorCache {
readonly A0_G0: string;
readonly A0_G1: string;
readonly A1_G0: string;
readonly A1_G1: string;
}
export class RegExpSource {
public source: string;
public readonly ruleId: number;
public hasAnchor: boolean;
public readonly hasBackReferences: boolean;
private _anchorCache: IRegExpSourceAnchorCache | null;
constructor(regExpSource: string, ruleId: number, handleAnchors: boolean = true) {
if (handleAnchors) {
@ -125,6 +140,7 @@ export class RegExpSource {
let lastPushedPos = 0;
let output: string[] = [];
let hasAnchor = false;
for (let pos = 0; pos < len; pos++) {
const ch = regExpSource.charAt(pos);
@ -135,12 +151,15 @@ export class RegExpSource {
output.push(regExpSource.substring(lastPushedPos, pos));
output.push('$(?!\\n)(?<!\\n)');
lastPushedPos = pos + 2;
} else if (nextCh === 'A' || nextCh === 'G') {
hasAnchor = true;
}
pos++;
}
}
}
this.hasAnchor = hasAnchor;
if (lastPushedPos === 0) {
// No \z hit
this.source = regExpSource;
@ -149,12 +168,20 @@ export class RegExpSource {
this.source = output.join('');
}
} else {
this.hasAnchor = false;
this.source = regExpSource;
}
} else {
this.hasAnchor = false;
this.source = regExpSource;
}
if (this.hasAnchor) {
this._anchorCache = this._buildAnchorCache();
} else {
this._anchorCache = null;
}
this.ruleId = ruleId;
this.hasBackReferences = HAS_BACK_REFERENCES.test(this.source);
@ -170,6 +197,10 @@ export class RegExpSource {
return;
}
this.source = newSource;
if (this.hasAnchor) {
this._anchorCache = this._buildAnchorCache();
}
}
public resolveBackReferences(lineText: string, captureIndices: IOnigCaptureIndex[]): string {
@ -181,16 +212,102 @@ export class RegExpSource {
return escapeRegExpCharacters(capturedValues[parseInt(g1, 10)] || '');
});
}
private _buildAnchorCache(): IRegExpSourceAnchorCache {
let A0_G0_result: string[] = [];
let A0_G1_result: string[] = [];
let A1_G0_result: string[] = [];
let A1_G1_result: string[] = [];
let pos: number,
len: number,
ch: string,
nextCh: string;
for (pos = 0, len = this.source.length; pos < len; pos++) {
ch = this.source.charAt(pos);
A0_G0_result[pos] = ch;
A0_G1_result[pos] = ch;
A1_G0_result[pos] = ch;
A1_G1_result[pos] = ch;
if (ch === '\\') {
if (pos + 1 < len) {
nextCh = this.source.charAt(pos + 1);
if (nextCh === 'A') {
A0_G0_result[pos + 1] = '\uFFFF';
A0_G1_result[pos + 1] = '\uFFFF';
A1_G0_result[pos + 1] = 'A';
A1_G1_result[pos + 1] = 'A';
} else if (nextCh === 'G') {
A0_G0_result[pos + 1] = '\uFFFF';
A0_G1_result[pos + 1] = 'G';
A1_G0_result[pos + 1] = '\uFFFF';
A1_G1_result[pos + 1] = 'G';
} else {
A0_G0_result[pos + 1] = nextCh;
A0_G1_result[pos + 1] = nextCh;
A1_G0_result[pos + 1] = nextCh;
A1_G1_result[pos + 1] = nextCh;
}
pos++;
}
}
}
return {
A0_G0: A0_G0_result.join(''),
A0_G1: A0_G1_result.join(''),
A1_G0: A1_G0_result.join(''),
A1_G1: A1_G1_result.join('')
};
}
public resolveAnchors(allowA: boolean, allowG: boolean): string {
if (!this.hasAnchor || !this._anchorCache) {
return this.source;
}
if (allowA) {
if (allowG) {
return this._anchorCache.A1_G1;
} else {
return this._anchorCache.A1_G0;
}
} else {
if (allowG) {
return this._anchorCache.A0_G1;
} else {
return this._anchorCache.A0_G0;
}
}
}
}
interface IRegExpSourceListAnchorCache {
A0_G0: CompiledRule | null;
A0_G1: CompiledRule | null;
A1_G0: CompiledRule | null;
A1_G1: CompiledRule | null;
}
export class RegExpSourceList {
private readonly _items: RegExpSource[];
private _hasAnchors: boolean;
private _cached: CompiledRule | null;
private _anchorCache: IRegExpSourceListAnchorCache;
constructor() {
this._items = [];
this._hasAnchors = false;
this._cached = null;
this._anchorCache = {
A0_G0: null,
A0_G1: null,
A1_G0: null,
A1_G1: null
};
}
public dispose(): void {
@ -202,14 +319,32 @@ export class RegExpSourceList {
this._cached.dispose();
this._cached = null;
}
if (this._anchorCache.A0_G0) {
this._anchorCache.A0_G0.dispose();
this._anchorCache.A0_G0 = null;
}
if (this._anchorCache.A0_G1) {
this._anchorCache.A0_G1.dispose();
this._anchorCache.A0_G1 = null;
}
if (this._anchorCache.A1_G0) {
this._anchorCache.A1_G0.dispose();
this._anchorCache.A1_G0 = null;
}
if (this._anchorCache.A1_G1) {
this._anchorCache.A1_G1.dispose();
this._anchorCache.A1_G1 = null;
}
}
public push(item: RegExpSource): void {
this._items.push(item);
this._hasAnchors = this._hasAnchors || item.hasAnchor;
}
public unshift(item: RegExpSource): void {
this._items.unshift(item);
this._hasAnchors = this._hasAnchors || item.hasAnchor;
}
public length(): number {
@ -231,6 +366,43 @@ export class RegExpSourceList {
}
return this._cached;
}
public compileAG(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
if (!this._hasAnchors) {
return this.compile(onigLib);
} else {
if (allowA) {
if (allowG) {
if (!this._anchorCache.A1_G1) {
this._anchorCache.A1_G1 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A1_G1;
} else {
if (!this._anchorCache.A1_G0) {
this._anchorCache.A1_G0 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A1_G0;
}
} else {
if (allowG) {
if (!this._anchorCache.A0_G1) {
this._anchorCache.A0_G1 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A0_G1;
} else {
if (!this._anchorCache.A0_G0) {
this._anchorCache.A0_G0 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A0_G0;
}
}
}
}
private _resolveAnchors(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
let regExps = this._items.map(e => e.resolveAnchors(allowA, allowG));
return new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
}
}
export class MatchRule extends Rule {
@ -261,11 +433,19 @@ export class MatchRule extends Rule {
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compile(grammar);
}
public compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compileAG(grammar, allowA, allowG);
}
private _getCachedCompiledPatterns(grammar: IRuleRegistry & IOnigLib): RegExpSourceList {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar);
return this._cachedCompiledPatterns;
}
}
@ -300,11 +480,19 @@ export class IncludeOnlyRule extends Rule {
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compile(grammar);
}
public compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compileAG(grammar, allowA, allowG);
}
private _getCachedCompiledPatterns(grammar: IRuleRegistry & IOnigLib): RegExpSourceList {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar);
return this._cachedCompiledPatterns;
}
}
@ -371,6 +559,14 @@ export class BeginEndRule extends Rule {
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
return this._getCachedCompiledPatterns(grammar, endRegexSource).compile(grammar);
}
public compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
return this._getCachedCompiledPatterns(grammar, endRegexSource).compileAG(grammar, allowA, allowG);
}
private _getCachedCompiledPatterns(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): RegExpSourceList {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
@ -389,7 +585,7 @@ export class BeginEndRule extends Rule {
this._cachedCompiledPatterns.setSource(0, endRegexSource);
}
}
return this._cachedCompiledPatterns.compile(grammar);
return this._cachedCompiledPatterns;
}
}
@ -456,14 +652,30 @@ export class BeginWhileRule extends Rule {
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compile(grammar);
}
public compileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
return this._getCachedCompiledPatterns(grammar).compileAG(grammar, allowA, allowG);
}
private _getCachedCompiledPatterns(grammar: IRuleRegistry & IOnigLib): RegExpSourceList {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar);
return this._cachedCompiledPatterns;
}
public compileWhile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule {
return this._getCachedCompiledWhilePatterns(grammar, endRegexSource).compile(grammar);
}
public compileWhileAG(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule {
return this._getCachedCompiledWhilePatterns(grammar, endRegexSource).compileAG(grammar, allowA, allowG);
}
private _getCachedCompiledWhilePatterns(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): RegExpSourceList {
if (!this._cachedCompiledWhilePatterns) {
this._cachedCompiledWhilePatterns = new RegExpSourceList();
this._cachedCompiledWhilePatterns.push(this._while.hasBackReferences ? this._while.clone() : this._while);
@ -471,7 +683,7 @@ export class BeginWhileRule extends Rule {
if (this._while.hasBackReferences) {
this._cachedCompiledWhilePatterns.setSource(0, endRegexSource ? endRegexSource : '\uFFFF');
}
return this._cachedCompiledWhilePatterns.compile(grammar);
return this._cachedCompiledWhilePatterns;
}
}