Adopt latest `vscode-oniguruma` and take advantage of new oniguruma search options
This commit is contained in:
Родитель
55a0d61ebe
Коммит
1aec0873bc
|
@ -3,7 +3,7 @@
|
|||
*--------------------------------------------------------*/
|
||||
|
||||
import { clone, mergeObjects } from './utils';
|
||||
import { IRawGrammar, IRawRepository, IRawRule, IOnigLib, IOnigCaptureIndex, OnigString, OnigScanner } from './types';
|
||||
import { IRawGrammar, IRawRepository, IRawRule, IOnigLib, IOnigCaptureIndex, OnigString, OnigScanner, FindOption } from './types';
|
||||
import { IRuleRegistry, IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, BeginWhileRule, MatchRule, CompiledRule } from './rule';
|
||||
import { createMatchers, Matcher } from './matcher';
|
||||
import { MetadataConsts, IGrammar, ITokenizeLineResult, ITokenizeLineResult2, IToken, IEmbeddedLanguagesMap, StandardTokenType, StackElement as StackElementDef, ITokenTypeMap } from './main';
|
||||
|
@ -653,6 +653,17 @@ function debugCompiledRuleToString(ruleScanner: CompiledRule): string {
|
|||
return r.join('\n');
|
||||
}
|
||||
|
||||
function getFindOptions(allowA: boolean, allowG: boolean): number {
|
||||
let options = FindOption.None;
|
||||
if (!allowA) {
|
||||
options |= FindOption.NotBeginString;
|
||||
}
|
||||
if (!allowG) {
|
||||
options |= FindOption.NotBeginPosition;
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
function matchInjections(injections: Injection[], grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchInjectionsResult | null {
|
||||
// The lower the better
|
||||
let bestMatchRating = Number.MAX_VALUE;
|
||||
|
@ -668,8 +679,9 @@ function matchInjections(injections: Injection[], grammar: Grammar, lineText: On
|
|||
// injection selector doesn't match stack
|
||||
continue;
|
||||
}
|
||||
const ruleScanner = grammar.getRule(injection.ruleId).compile(grammar, null, isFirstLine, linePos === anchorPosition);
|
||||
const matchResult = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
|
||||
const ruleScanner = grammar.getRule(injection.ruleId).compile(grammar, null);
|
||||
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
|
||||
const matchResult = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
|
||||
if (DebugFlags.InDebugMode) {
|
||||
console.log(' scanning for injections');
|
||||
console.log(debugCompiledRuleToString(ruleScanner));
|
||||
|
@ -714,14 +726,15 @@ interface IMatchResult {
|
|||
|
||||
function matchRule(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement, anchorPosition: number): IMatchResult | null {
|
||||
const rule = stack.getRule(grammar);
|
||||
const ruleScanner = rule.compile(grammar, stack.endRule, isFirstLine, linePos === anchorPosition);
|
||||
const ruleScanner = rule.compile(grammar, stack.endRule);
|
||||
const findOptions = getFindOptions(isFirstLine, linePos === anchorPosition);
|
||||
|
||||
let perfStart = 0;
|
||||
if (DebugFlags.InDebugMode) {
|
||||
perfStart = performanceNow();
|
||||
}
|
||||
|
||||
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
|
||||
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
|
||||
|
||||
if (DebugFlags.InDebugMode) {
|
||||
const elapsedMillis = performanceNow() - perfStart;
|
||||
|
@ -808,8 +821,9 @@ function _checkWhileConditions(grammar: Grammar, lineText: OnigString, isFirstLi
|
|||
}
|
||||
|
||||
for (let whileRule = whileRules.pop(); whileRule; whileRule = whileRules.pop()) {
|
||||
const ruleScanner = whileRule.rule.compileWhile(grammar, whileRule.stack.endRule, isFirstLine, anchorPosition === linePos);
|
||||
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos);
|
||||
const ruleScanner = whileRule.rule.compileWhile(grammar, whileRule.stack.endRule);
|
||||
const findOptions = getFindOptions(isFirstLine, anchorPosition === linePos);
|
||||
const r = ruleScanner.scanner.findNextMatchSync(lineText, linePos, findOptions);
|
||||
if (DebugFlags.InDebugMode) {
|
||||
console.log(' scanning for while rule');
|
||||
console.log(debugCompiledRuleToString(ruleScanner));
|
||||
|
|
196
src/rule.ts
196
src/rule.ts
|
@ -82,7 +82,7 @@ export abstract class Rule {
|
|||
|
||||
public abstract collectPatternsRecursive(grammar: IRuleRegistry, out: RegExpSourceList, isFirst: boolean): void;
|
||||
|
||||
public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule;
|
||||
public abstract compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule;
|
||||
}
|
||||
|
||||
export interface ICompilePatternsResult {
|
||||
|
@ -107,25 +107,16 @@ export class CaptureRule extends Rule {
|
|||
throw new Error('Not supported!');
|
||||
}
|
||||
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
|
||||
throw new Error('Not supported!');
|
||||
}
|
||||
}
|
||||
|
||||
interface IRegExpSourceAnchorCache {
|
||||
readonly A0_G0: string;
|
||||
readonly A0_G1: string;
|
||||
readonly A1_G0: string;
|
||||
readonly A1_G1: string;
|
||||
}
|
||||
|
||||
export class RegExpSource {
|
||||
|
||||
public source: string;
|
||||
public readonly ruleId: number;
|
||||
public hasAnchor: boolean;
|
||||
public readonly hasBackReferences: boolean;
|
||||
private _anchorCache: IRegExpSourceAnchorCache | null;
|
||||
|
||||
constructor(regExpSource: string, ruleId: number, handleAnchors: boolean = true) {
|
||||
if (handleAnchors) {
|
||||
|
@ -134,7 +125,6 @@ export class RegExpSource {
|
|||
let lastPushedPos = 0;
|
||||
let output: string[] = [];
|
||||
|
||||
let hasAnchor = false;
|
||||
for (let pos = 0; pos < len; pos++) {
|
||||
const ch = regExpSource.charAt(pos);
|
||||
|
||||
|
@ -145,15 +135,12 @@ export class RegExpSource {
|
|||
output.push(regExpSource.substring(lastPushedPos, pos));
|
||||
output.push('$(?!\\n)(?<!\\n)');
|
||||
lastPushedPos = pos + 2;
|
||||
} else if (nextCh === 'A' || nextCh === 'G') {
|
||||
hasAnchor = true;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.hasAnchor = hasAnchor;
|
||||
if (lastPushedPos === 0) {
|
||||
// No \z hit
|
||||
this.source = regExpSource;
|
||||
|
@ -162,20 +149,12 @@ export class RegExpSource {
|
|||
this.source = output.join('');
|
||||
}
|
||||
} else {
|
||||
this.hasAnchor = false;
|
||||
this.source = regExpSource;
|
||||
}
|
||||
} else {
|
||||
this.hasAnchor = false;
|
||||
this.source = regExpSource;
|
||||
}
|
||||
|
||||
if (this.hasAnchor) {
|
||||
this._anchorCache = this._buildAnchorCache();
|
||||
} else {
|
||||
this._anchorCache = null;
|
||||
}
|
||||
|
||||
this.ruleId = ruleId;
|
||||
this.hasBackReferences = HAS_BACK_REFERENCES.test(this.source);
|
||||
|
||||
|
@ -191,10 +170,6 @@ export class RegExpSource {
|
|||
return;
|
||||
}
|
||||
this.source = newSource;
|
||||
|
||||
if (this.hasAnchor) {
|
||||
this._anchorCache = this._buildAnchorCache();
|
||||
}
|
||||
}
|
||||
|
||||
public resolveBackReferences(lineText: string, captureIndices: IOnigCaptureIndex[]): string {
|
||||
|
@ -206,102 +181,16 @@ export class RegExpSource {
|
|||
return escapeRegExpCharacters(capturedValues[parseInt(g1, 10)] || '');
|
||||
});
|
||||
}
|
||||
|
||||
private _buildAnchorCache(): IRegExpSourceAnchorCache {
|
||||
let A0_G0_result: string[] = [];
|
||||
let A0_G1_result: string[] = [];
|
||||
let A1_G0_result: string[] = [];
|
||||
let A1_G1_result: string[] = [];
|
||||
|
||||
let pos: number,
|
||||
len: number,
|
||||
ch: string,
|
||||
nextCh: string;
|
||||
|
||||
for (pos = 0, len = this.source.length; pos < len; pos++) {
|
||||
ch = this.source.charAt(pos);
|
||||
A0_G0_result[pos] = ch;
|
||||
A0_G1_result[pos] = ch;
|
||||
A1_G0_result[pos] = ch;
|
||||
A1_G1_result[pos] = ch;
|
||||
|
||||
if (ch === '\\') {
|
||||
if (pos + 1 < len) {
|
||||
nextCh = this.source.charAt(pos + 1);
|
||||
if (nextCh === 'A') {
|
||||
A0_G0_result[pos + 1] = '\uFFFF';
|
||||
A0_G1_result[pos + 1] = '\uFFFF';
|
||||
A1_G0_result[pos + 1] = 'A';
|
||||
A1_G1_result[pos + 1] = 'A';
|
||||
} else if (nextCh === 'G') {
|
||||
A0_G0_result[pos + 1] = '\uFFFF';
|
||||
A0_G1_result[pos + 1] = 'G';
|
||||
A1_G0_result[pos + 1] = '\uFFFF';
|
||||
A1_G1_result[pos + 1] = 'G';
|
||||
} else {
|
||||
A0_G0_result[pos + 1] = nextCh;
|
||||
A0_G1_result[pos + 1] = nextCh;
|
||||
A1_G0_result[pos + 1] = nextCh;
|
||||
A1_G1_result[pos + 1] = nextCh;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
A0_G0: A0_G0_result.join(''),
|
||||
A0_G1: A0_G1_result.join(''),
|
||||
A1_G0: A1_G0_result.join(''),
|
||||
A1_G1: A1_G1_result.join('')
|
||||
};
|
||||
}
|
||||
|
||||
public resolveAnchors(allowA: boolean, allowG: boolean): string {
|
||||
if (!this.hasAnchor || !this._anchorCache) {
|
||||
return this.source;
|
||||
}
|
||||
|
||||
if (allowA) {
|
||||
if (allowG) {
|
||||
return this._anchorCache.A1_G1;
|
||||
} else {
|
||||
return this._anchorCache.A1_G0;
|
||||
}
|
||||
} else {
|
||||
if (allowG) {
|
||||
return this._anchorCache.A0_G1;
|
||||
} else {
|
||||
return this._anchorCache.A0_G0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface IRegExpSourceListAnchorCache {
|
||||
A0_G0: CompiledRule | null;
|
||||
A0_G1: CompiledRule | null;
|
||||
A1_G0: CompiledRule | null;
|
||||
A1_G1: CompiledRule | null;
|
||||
}
|
||||
|
||||
export class RegExpSourceList {
|
||||
|
||||
private readonly _items: RegExpSource[];
|
||||
private _hasAnchors: boolean;
|
||||
private _cached: CompiledRule | null;
|
||||
private _anchorCache: IRegExpSourceListAnchorCache;
|
||||
|
||||
constructor() {
|
||||
this._items = [];
|
||||
this._hasAnchors = false;
|
||||
this._cached = null;
|
||||
this._anchorCache = {
|
||||
A0_G0: null,
|
||||
A0_G1: null,
|
||||
A1_G0: null,
|
||||
A1_G1: null
|
||||
};
|
||||
}
|
||||
|
||||
public dispose(): void {
|
||||
|
@ -313,32 +202,14 @@ export class RegExpSourceList {
|
|||
this._cached.dispose();
|
||||
this._cached = null;
|
||||
}
|
||||
if (this._anchorCache.A0_G0) {
|
||||
this._anchorCache.A0_G0.dispose();
|
||||
this._anchorCache.A0_G0 = null;
|
||||
}
|
||||
if (this._anchorCache.A0_G1) {
|
||||
this._anchorCache.A0_G1.dispose();
|
||||
this._anchorCache.A0_G1 = null;
|
||||
}
|
||||
if (this._anchorCache.A1_G0) {
|
||||
this._anchorCache.A1_G0.dispose();
|
||||
this._anchorCache.A1_G0 = null;
|
||||
}
|
||||
if (this._anchorCache.A1_G1) {
|
||||
this._anchorCache.A1_G1.dispose();
|
||||
this._anchorCache.A1_G1 = null;
|
||||
}
|
||||
}
|
||||
|
||||
public push(item: RegExpSource): void {
|
||||
this._items.push(item);
|
||||
this._hasAnchors = this._hasAnchors || item.hasAnchor;
|
||||
}
|
||||
|
||||
public unshift(item: RegExpSource): void {
|
||||
this._items.unshift(item);
|
||||
this._hasAnchors = this._hasAnchors || item.hasAnchor;
|
||||
}
|
||||
|
||||
public length(): number {
|
||||
|
@ -353,45 +224,12 @@ export class RegExpSourceList {
|
|||
}
|
||||
}
|
||||
|
||||
public compile(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
if (!this._hasAnchors) {
|
||||
if (!this._cached) {
|
||||
let regExps = this._items.map(e => e.source);
|
||||
this._cached = new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
|
||||
}
|
||||
return this._cached;
|
||||
} else {
|
||||
if (allowA) {
|
||||
if (allowG) {
|
||||
if (!this._anchorCache.A1_G1) {
|
||||
this._anchorCache.A1_G1 = this._resolveAnchors(onigLib, allowA, allowG);
|
||||
}
|
||||
return this._anchorCache.A1_G1;
|
||||
} else {
|
||||
if (!this._anchorCache.A1_G0) {
|
||||
this._anchorCache.A1_G0 = this._resolveAnchors(onigLib, allowA, allowG);
|
||||
}
|
||||
return this._anchorCache.A1_G0;
|
||||
}
|
||||
} else {
|
||||
if (allowG) {
|
||||
if (!this._anchorCache.A0_G1) {
|
||||
this._anchorCache.A0_G1 = this._resolveAnchors(onigLib, allowA, allowG);
|
||||
}
|
||||
return this._anchorCache.A0_G1;
|
||||
} else {
|
||||
if (!this._anchorCache.A0_G0) {
|
||||
this._anchorCache.A0_G0 = this._resolveAnchors(onigLib, allowA, allowG);
|
||||
}
|
||||
return this._anchorCache.A0_G0;
|
||||
}
|
||||
}
|
||||
public compile(onigLib: IOnigLib): CompiledRule {
|
||||
if (!this._cached) {
|
||||
let regExps = this._items.map(e => e.source);
|
||||
this._cached = new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
|
||||
}
|
||||
}
|
||||
|
||||
private _resolveAnchors(onigLib: IOnigLib, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
let regExps = this._items.map(e => e.resolveAnchors(allowA, allowG));
|
||||
return new CompiledRule(onigLib, regExps, this._items.map(e => e.ruleId));
|
||||
return this._cached;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -422,12 +260,12 @@ export class MatchRule extends Rule {
|
|||
out.push(this._match);
|
||||
}
|
||||
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
|
||||
if (!this._cachedCompiledPatterns) {
|
||||
this._cachedCompiledPatterns = new RegExpSourceList();
|
||||
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
|
||||
}
|
||||
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
|
||||
return this._cachedCompiledPatterns.compile(grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -461,12 +299,12 @@ export class IncludeOnlyRule extends Rule {
|
|||
}
|
||||
}
|
||||
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
|
||||
if (!this._cachedCompiledPatterns) {
|
||||
this._cachedCompiledPatterns = new RegExpSourceList();
|
||||
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
|
||||
}
|
||||
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
|
||||
return this._cachedCompiledPatterns.compile(grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -532,7 +370,7 @@ export class BeginEndRule extends Rule {
|
|||
}
|
||||
}
|
||||
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
|
||||
if (!this._cachedCompiledPatterns) {
|
||||
this._cachedCompiledPatterns = new RegExpSourceList();
|
||||
|
||||
|
@ -551,7 +389,7 @@ export class BeginEndRule extends Rule {
|
|||
this._cachedCompiledPatterns.setSource(0, endRegexSource);
|
||||
}
|
||||
}
|
||||
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
|
||||
return this._cachedCompiledPatterns.compile(grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -617,15 +455,15 @@ export class BeginWhileRule extends Rule {
|
|||
}
|
||||
}
|
||||
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string): CompiledRule {
|
||||
if (!this._cachedCompiledPatterns) {
|
||||
this._cachedCompiledPatterns = new RegExpSourceList();
|
||||
this.collectPatternsRecursive(grammar, this._cachedCompiledPatterns, true);
|
||||
}
|
||||
return this._cachedCompiledPatterns.compile(grammar, allowA, allowG);
|
||||
return this._cachedCompiledPatterns.compile(grammar);
|
||||
}
|
||||
|
||||
public compileWhile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null, allowA: boolean, allowG: boolean): CompiledRule {
|
||||
public compileWhile(grammar: IRuleRegistry & IOnigLib, endRegexSource: string | null): CompiledRule {
|
||||
if (!this._cachedCompiledWhilePatterns) {
|
||||
this._cachedCompiledWhilePatterns = new RegExpSourceList();
|
||||
this._cachedCompiledWhilePatterns.push(this._while.hasBackReferences ? this._while.clone() : this._while);
|
||||
|
@ -633,7 +471,7 @@ export class BeginWhileRule extends Rule {
|
|||
if (this._while.hasBackReferences) {
|
||||
this._cachedCompiledWhilePatterns.setSource(0, endRegexSource ? endRegexSource : '\uFFFF');
|
||||
}
|
||||
return this._cachedCompiledWhilePatterns.compile(grammar, allowA, allowG);
|
||||
return this._cachedCompiledWhilePatterns.compile(grammar);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
22
src/types.ts
22
src/types.ts
|
@ -80,8 +80,28 @@ export interface IOnigMatch {
|
|||
captureIndices: IOnigCaptureIndex[];
|
||||
}
|
||||
|
||||
export const enum FindOption {
|
||||
None = 0,
|
||||
/**
|
||||
* equivalent of ONIG_OPTION_NOT_BEGIN_STRING: (str) isn't considered as begin of string (* fail \A)
|
||||
*/
|
||||
NotBeginString = 1,
|
||||
/**
|
||||
* equivalent of ONIG_OPTION_NOT_END_STRING: (end) isn't considered as end of string (* fail \z, \Z)
|
||||
*/
|
||||
NotEndString = 2,
|
||||
/**
|
||||
* equivalent of ONIG_OPTION_NOT_BEGIN_POSITION: (start) isn't considered as start position of search (* fail \G)
|
||||
*/
|
||||
NotBeginPosition = 4,
|
||||
/**
|
||||
* used for debugging purposes.
|
||||
*/
|
||||
DebugCall = 8,
|
||||
}
|
||||
|
||||
export interface OnigScanner {
|
||||
findNextMatchSync(string: string | OnigString, startPosition: number): IOnigMatch;
|
||||
findNextMatchSync(string: string | OnigString, startPosition: number, options: number): IOnigMatch;
|
||||
dispose?(): void;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче