Adopt latest `vscode-oniguruma` and take advantage of new oniguruma search options

This commit is contained in:
Alexandru Dima 2020-10-07 22:19:21 +00:00 коммит произвёл GitHub
Родитель 55a0d61ebe
Коммит 1aec0873bc
3 изменённых файлов: 59 добавлений и 187 удалений

Просмотреть файл

@ -3,7 +3,7 @@
*--------------------------------------------------------*/
import { clone, mergeObjects } from './utils';
import { IRawGrammar, IRawRepository, IRawRule, IOnigLib, IOnigCaptureIndex, OnigString, OnigScanner } from './types';
import { IRawGrammar, IRawRepository, IRawRule, IOnigLib, IOnigCaptureIndex, OnigString, OnigScanner, FindOption } from './types';
import { IRuleRegistry, IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, BeginWhileRule, MatchRule, CompiledRule } from './rule';
import { createMatchers, Matcher } from './matcher';
import { MetadataConsts, IGrammar, ITokenizeLineResult, ITokenizeLineResult2, IToken, IEmbeddedLanguagesMap, StandardTokenType, StackElement as StackElementDef, ITokenTypeMap } from './main';
@ -653,6 +653,17 @@ function debugCompiledRuleToString(ruleScanner: CompiledRule): string {
return r.join('\n');
}
function getFindOptions(allowA: boolean, allowG: boolean): number {
let options = FindOption.None;
if (!allowA) {
options |= FindOption.NotBeginString;
}
if (!allowG) {
options |= FindOption.NotBeginPosition;
}
return options;
}
function matchInjections(injections: Injection[], grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchInjectionsResult | null {
// The lower the better
let bestMatchRating = Number.MAX_VALUE;
@ -668,8 +679,9 @@ function matchInjections(injections: Injection[], grammar: Grammar, lineText: On
// injection selector doesn't match stack
continue;
}
const ruleScanner = grammar.getRule(injection.ruleId).compile(grammar, null, isFirstLine, linePos === anchorPosition);
const matchResult = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
const ruleScanner = grammar.getRule(injection.ruleId).compile(grammar, null);
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
const matchResult = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
if (DebugFlags.InDebugMode) {
console.log(' scanning for injections');
console.log(debugCompiledRuleToString(ruleScanner));
@ -714,14 +726,15 @@ interface IMatchResult {
function matchRule(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchResult | null {
const rule = stack.getRule(grammar);
const ruleScanner = rule.compile(grammar, stack.endRule, isFirstLine, linePos === anchorPosition);
const ruleScanner = rule.compile(grammar, stack.endRule);
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
let perfStart = 0;
if (DebugFlags.InDebugMode) {
perfStart = performanceNow();
}
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
if (DebugFlags.InDebugMode) {
const elapsedMillis = performanceNow() - perfStart;
@ -808,8 +821,9 @@ function _checkWhileConditions(grammar: Grammar, lineText: OnigString, isFirstLi
}
for (let whileRule = whileRules.pop(); whileRule; whileRule = whileRules.pop()) {
const ruleScanner = whileRule.rule.compileWhile(grammar, whileRule.stack.endRule, isFirstLine, anchorPosition === linePos);
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
const ruleScanner = whileRule.rule.compileWhile(grammar, whileRule.stack.endRule);
const findOptions = getFindOptions(isFirstLine, anchorPosition === linePos);
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
if (DebugFlags.InDebugMode) {
console.log(' scanning for while rule');
console.log(debugCompiledRuleToString(ruleScanner));

Просмотреть файл

@ -82,7 +82,7 @@ export abstract class Rule {
public abstract collectPatternsRecursive(grammar: IRuleRegistry, out: RegExpSourceList, isFirst: boolean): void;
public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule;
public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule;
}
export interface ICompilePatternsResult {
@ -107,25 +107,16 @@ export class CaptureRule extends Rule {
throw new Error('Not supported!');
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
throw new Error('Not supported!');
}
}
interface IRegExpSourceAnchorCache {
readonly A0_G0: string;
readonly A0_G1: string;
readonly A1_G0: string;
readonly A1_G1: string;
}
export class RegExpSource {
public source: string;
public readonly ruleId: number;
public hasAnchor: boolean;
public readonly hasBackReferences: boolean;
private _anchorCache: IRegExpSourceAnchorCache | null;
constructor(regExpSource: string, ruleId: number, handleAnchors: boolean = true) {
if (handleAnchors) {
@ -134,7 +125,6 @@ export class RegExpSource {
let lastPushedPos = 0;
let output: string[] = [];
let hasAnchor = false;
for (let pos = 0; pos < len; pos++) {
const ch = regExpSource.charAt(pos);
@ -145,15 +135,12 @@ export class RegExpSource {
output.push(regExpSource.substring(lastPushedPos, pos));
output.push('$(?!\\n)(?<!\\n)');
lastPushedPos = pos + 2;
} else if (nextCh === 'A' || nextCh === 'G') {
hasAnchor = true;
}
pos++;
}
}
}
this.hasAnchor = hasAnchor;
if (lastPushedPos === 0) {
// No \z hit
this.source = regExpSource;
@ -162,20 +149,12 @@ export class RegExpSource {
this.source = output.join('');
}
} else {
this.hasAnchor = false;
this.source = regExpSource;
}
} else {
this.hasAnchor = false;
this.source = regExpSource;
}
if (this.hasAnchor) {
this._anchorCache = this._buildAnchorCache();
} else {
this._anchorCache = null;
}
this.ruleId = ruleId;
this.hasBackReferences = HAS_BACK_REFERENCES.test(this.source);
@ -191,10 +170,6 @@ export class RegExpSource {
return;
}
this.source = newSource;
if (this.hasAnchor) {
this._anchorCache = this._buildAnchorCache();
}
}
public resolveBackReferences(lineText: string, captureIndices: IOnigCaptureIndex[]): string {
@ -206,102 +181,16 @@ export class RegExpSource {
return escapeRegExpCharacters(capturedValues[parseInt(g1, 10)] || '');
});
}
private _buildAnchorCache(): IRegExpSourceAnchorCache {
let A0_G0_result: string[] = [];
let A0_G1_result: string[] = [];
let A1_G0_result: string[] = [];
let A1_G1_result: string[] = [];
let pos: number,
len: number,
ch: string,
nextCh: string;
for (pos = 0, len = this.source.length; pos < len; pos++) {
ch = this.source.charAt(pos);
A0_G0_result[pos] = ch;
A0_G1_result[pos] = ch;
A1_G0_result[pos] = ch;
A1_G1_result[pos] = ch;
if (ch === '\\') {
if (pos + 1 < len) {
nextCh = this.source.charAt(pos + 1);
if (nextCh === 'A') {
A0_G0_result[pos + 1] = '\uFFFF';
A0_G1_result[pos + 1] = '\uFFFF';
A1_G0_result[pos + 1] = 'A';
A1_G1_result[pos + 1] = 'A';
} else if (nextCh === 'G') {
A0_G0_result[pos + 1] = '\uFFFF';
A0_G1_result[pos + 1] = 'G';
A1_G0_result[pos + 1] = '\uFFFF';
A1_G1_result[pos + 1] = 'G';
} else {
A0_G0_result[pos + 1] = nextCh;
A0_G1_result[pos + 1] = nextCh;
A1_G0_result[pos + 1] = nextCh;
A1_G1_result[pos + 1] = nextCh;
}
pos++;
}
}
}
return {
A0_G0: A0_G0_result.join(''),
A0_G1: A0_G1_result.join(''),
A1_G0: A1_G0_result.join(''),
A1_G1: A1_G1_result.join('')
};
}
public resolveAnchors(allowA: boolean, allowG: boolean): string {
if (!this.hasAnchor || !this._anchorCache) {
return this.source;
}
if (allowA) {
if (allowG) {
return this._anchorCache.A1_G1;
} else {
return this._anchorCache.A1_G0;
}
} else {
if (allowG) {
return this._anchorCache.A0_G1;
} else {
return this._anchorCache.A0_G0;
}
}
}
}
interface IRegExpSourceListAnchorCache {
A0_G0: CompiledRule | null;
A0_G1: CompiledRule | null;
A1_G0: CompiledRule | null;
A1_G1: CompiledRule | null;
}
export class RegExpSourceList {
private readonly _items: RegExpSource[];
private _hasAnchors: boolean;
private _cached: CompiledRule | null;
private _anchorCache: IRegExpSourceListAnchorCache;
constructor() {
this._items = [];
this._hasAnchors = false;
this._cached = null;
this._anchorCache = {
A0_G0: null,
A0_G1: null,
A1_G0: null,
A1_G1: null
};
}
public dispose(): void {
@ -313,32 +202,14 @@ export class RegExpSourceList {
this._cached.dispose();
this._cached = null;
}
if (this._anchorCache.A0_G0) {
this._anchorCache.A0_G0.dispose();
this._anchorCache.A0_G0 = null;
}
if (this._anchorCache.A0_G1) {
this._anchorCache.A0_G1.dispose();
this._anchorCache.A0_G1 = null;
}
if (this._anchorCache.A1_G0) {
this._anchorCache.A1_G0.dispose();
this._anchorCache.A1_G0 = null;
}
if (this._anchorCache.A1_G1) {
this._anchorCache.A1_G1.dispose();
this._anchorCache.A1_G1 = null;
}
}
public push(item: RegExpSource): void {
this._items.push(item);
this._hasAnchors = this._hasAnchors || item.hasAnchor;
}
public unshift(item: RegExpSource): void {
this._items.unshift(item);
this._hasAnchors = this._hasAnchors || item.hasAnchor;
}
public length(): number {
@ -353,45 +224,12 @@ export class RegExpSourceList {
}
}
public compile(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
if (!this._hasAnchors) {
if (!this._cached) {
let regExps = this._items.map(e => e.source);
this._cached = new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
}
return this._cached;
} else {
if (allowA) {
if (allowG) {
if (!this._anchorCache.A1_G1) {
this._anchorCache.A1_G1 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A1_G1;
} else {
if (!this._anchorCache.A1_G0) {
this._anchorCache.A1_G0 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A1_G0;
}
} else {
if (allowG) {
if (!this._anchorCache.A0_G1) {
this._anchorCache.A0_G1 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A0_G1;
} else {
if (!this._anchorCache.A0_G0) {
this._anchorCache.A0_G0 = this._resolveAnchors(onigLib, allowA, allowG);
}
return this._anchorCache.A0_G0;
}
}
public compile(onigLib: IOnigLib): CompiledRule {
if (!this._cached) {
let regExps = this._items.map(e => e.source);
this._cached = new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
}
}
private _resolveAnchors(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
let regExps = this._items.map(e => e.resolveAnchors(allowA, allowG));
return new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
return this._cached;
}
}
@ -422,12 +260,12 @@ export class MatchRule extends Rule {
out.push(this._match);
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
return this._cachedCompiledPatterns.compile(grammar);
}
}
@ -461,12 +299,12 @@ export class IncludeOnlyRule extends Rule {
}
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
return this._cachedCompiledPatterns.compile(grammar);
}
}
@ -532,7 +370,7 @@ export class BeginEndRule extends Rule {
}
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
@ -551,7 +389,7 @@ export class BeginEndRule extends Rule {
this._cachedCompiledPatterns.setSource(0, endRegexSource);
}
}
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
return this._cachedCompiledPatterns.compile(grammar);
}
}
@ -617,15 +455,15 @@ export class BeginWhileRule extends Rule {
}
}
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
if (!this._cachedCompiledPatterns) {
this._cachedCompiledPatterns = new RegExpSourceList();
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
}
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
return this._cachedCompiledPatterns.compile(grammar);
}
public compileWhile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule {
public compileWhile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule {
if (!this._cachedCompiledWhilePatterns) {
this._cachedCompiledWhilePatterns = new RegExpSourceList();
this._cachedCompiledWhilePatterns.push(this._while.hasBackReferences ? this._while.clone() : this._while);
@ -633,7 +471,7 @@ export class BeginWhileRule extends Rule {
if (this._while.hasBackReferences) {
this._cachedCompiledWhilePatterns.setSource(0, endRegexSource ? endRegexSource : '\uFFFF');
}
return this._cachedCompiledWhilePatterns.compile(grammar, allowA, allowG);
return this._cachedCompiledWhilePatterns.compile(grammar);
}
}

Просмотреть файл

@ -80,8 +80,28 @@ export interface IOnigMatch {
captureIndices: IOnigCaptureIndex[];
}
export const enum FindOption {
None = 0,
/**
* equivalent of ONIG_OPTION_NOT_BEGIN_STRING: (str) isn't considered as begin of string (* fail \A)
*/
NotBeginString = 1,
/**
* equivalent of ONIG_OPTION_NOT_END_STRING: (end) isn't considered as end of string (* fail \z, \Z)
*/
NotEndString = 2,
/**
* equivalent of ONIG_OPTION_NOT_BEGIN_POSITION: (start) isn't considered as start position of search (* fail \G)
*/
NotBeginPosition = 4,
/**
* used for debugging purposes.
*/
DebugCall = 8,
}
export interface OnigScanner {
findNextMatchSync(string: string | OnigString, startPosition: number): IOnigMatch;
findNextMatchSync(string: string | OnigString, startPosition: number, options: number): IOnigMatch;
dispose?(): void;
}