Adopt oniguruma with OnigScanner
This commit is contained in:
Родитель
e08a39f73e
Коммит
6be0ea8e60
|
@ -25,7 +25,9 @@ function tokenizeFile(filePath, grammar, message) {
|
|||
tokenize(grammar, content.split(/\r\n|\r|\n/));
|
||||
}
|
||||
|
||||
tokenizeFile(path.join(__dirname, 'large.js'), jsGrammar, 'Tokenizing jQuery v2.0.3')
|
||||
tokenizeFile(path.join(__dirname, 'large.min.js'), jsGrammar, 'Tokenizing jQuery v2.0.3 minified')
|
||||
tokenizeFile(path.join(__dirname, 'bootstrap.css'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1')
|
||||
tokenizeFile(path.join(__dirname, 'bootstrap.min.css'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1 minified')
|
||||
tokenizeFile(path.join(__dirname, 'main.08642f99.css.txt'), cssGrammar, 'Tokenizing Bootstrap with multi-byte')
|
||||
|
||||
tokenizeFile(path.join(__dirname, 'large.js.txt'), jsGrammar, 'Tokenizing jQuery v2.0.3')
|
||||
tokenizeFile(path.join(__dirname, 'large.min.js.txt'), jsGrammar, 'Tokenizing jQuery v2.0.3 minified')
|
||||
tokenizeFile(path.join(__dirname, 'bootstrap.css.txt'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1')
|
||||
tokenizeFile(path.join(__dirname, 'bootstrap.min.css.txt'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1 minified')
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "vscode-textmate",
|
||||
"version": "1.0.8",
|
||||
"version": "1.0.9",
|
||||
"description": "VSCode TextMate grammar helpers",
|
||||
"author": {
|
||||
"name": "Microsoft Corporation"
|
||||
|
@ -20,7 +20,7 @@
|
|||
"benchmark": "node benchmark/benchmark.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"oniguruma": "^5.1.1",
|
||||
"alexandrudima-oniguruma": "^6.0.0",
|
||||
"sax": "^1.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
@ -558,15 +558,46 @@ var RegExpSource = (function () {
|
|||
return RegExpSource;
|
||||
})();
|
||||
exports.RegExpSource = RegExpSource;
|
||||
var createOnigScanner = (function () {
|
||||
var USE_NEW_ONE = true;
|
||||
var getOnigModule = (function () {
|
||||
var onigurumaModule = null;
|
||||
return function createOnigScanner(sources) {
|
||||
return function () {
|
||||
if (!onigurumaModule) {
|
||||
if (USE_NEW_ONE) {
|
||||
onigurumaModule = require('alexandrudima-oniguruma');
|
||||
}
|
||||
else {
|
||||
onigurumaModule = require('oniguruma');
|
||||
}
|
||||
return new onigurumaModule.OnigScanner(sources);
|
||||
}
|
||||
return onigurumaModule;
|
||||
};
|
||||
})();
|
||||
function createOnigScanner(sources) {
|
||||
var onigurumaModule = getOnigModule();
|
||||
return new onigurumaModule.OnigScanner(sources);
|
||||
}
|
||||
function createOnigString(sources) {
|
||||
if (USE_NEW_ONE) {
|
||||
var onigurumaModule = getOnigModule();
|
||||
var r = new onigurumaModule.OnigString(sources);
|
||||
r.$str = sources;
|
||||
return r;
|
||||
}
|
||||
else {
|
||||
return sources;
|
||||
}
|
||||
}
|
||||
exports.createOnigString = createOnigString;
|
||||
function getString(str) {
|
||||
if (USE_NEW_ONE) {
|
||||
return str.$str;
|
||||
}
|
||||
else {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
exports.getString = getString;
|
||||
var RegExpSourceList = (function () {
|
||||
function RegExpSourceList() {
|
||||
this._items = [];
|
||||
|
@ -1046,9 +1077,10 @@ var Grammar = (function () {
|
|||
}
|
||||
}
|
||||
lineText = lineText + '\n';
|
||||
var lineLength = lineText.length;
|
||||
var onigLineText = rule_1.createOnigString(lineText);
|
||||
var lineLength = rule_1.getString(onigLineText).length;
|
||||
var lineTokens = new LineTokens();
|
||||
_tokenizeString(this, lineText, isFirstLine, 0, prevState, lineTokens);
|
||||
_tokenizeString(this, onigLineText, isFirstLine, 0, prevState, lineTokens);
|
||||
var _produced = lineTokens.getResult(prevState, lineLength);
|
||||
return {
|
||||
tokens: _produced,
|
||||
|
@ -1097,12 +1129,12 @@ function handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, captu
|
|||
if (captureRule.retokenizeCapturedWithRuleId) {
|
||||
// the capture requires additional matching
|
||||
var stackClone = stack.map(function (el) { return el.clone(); });
|
||||
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(lineText, captureIndices), captureRule.getContentName(lineText, captureIndices)));
|
||||
_tokenizeString(grammar, lineText.substring(0, captureIndex.end), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
|
||||
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(rule_1.getString(lineText), captureIndices), captureRule.getContentName(rule_1.getString(lineText), captureIndices)));
|
||||
_tokenizeString(grammar, rule_1.createOnigString(rule_1.getString(lineText).substring(0, captureIndex.end)), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
|
||||
continue;
|
||||
}
|
||||
// push
|
||||
localStack.push(new LocalStackElement(captureRule.getName(lineText, captureIndices), captureIndex.end));
|
||||
localStack.push(new LocalStackElement(captureRule.getName(rule_1.getString(lineText), captureIndices), captureIndex.end));
|
||||
}
|
||||
while (localStack.length > 0) {
|
||||
// pop!
|
||||
|
@ -1185,7 +1217,7 @@ function matchRuleOrInjections(grammar, lineText, isFirstLine, linePos, stack, a
|
|||
return matchResult;
|
||||
}
|
||||
function _tokenizeString(grammar, lineText, isFirstLine, linePos, stack, lineTokens) {
|
||||
var lineLength = lineText.length;
|
||||
var lineLength = rule_1.getString(lineText).length;
|
||||
var anchorPosition = -1;
|
||||
while (linePos < lineLength) {
|
||||
scanNext(); // potentially modifies linePos && anchorPosition
|
||||
|
@ -1224,15 +1256,15 @@ function _tokenizeString(grammar, lineText, isFirstLine, linePos, stack, lineTok
|
|||
var _rule = grammar.getRule(matchedRuleId);
|
||||
lineTokens.produce(stack, captureIndices[0].start);
|
||||
// push it on the stack rule
|
||||
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(lineText, captureIndices), null));
|
||||
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(rule_1.getString(lineText), captureIndices), null));
|
||||
if (_rule instanceof rule_1.BeginEndRule) {
|
||||
var pushedRule = _rule;
|
||||
handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, pushedRule.beginCaptures, captureIndices);
|
||||
lineTokens.produce(stack, captureIndices[0].end);
|
||||
anchorPosition = captureIndices[0].end;
|
||||
stack[stack.length - 1].contentName = pushedRule.getContentName(lineText, captureIndices);
|
||||
stack[stack.length - 1].contentName = pushedRule.getContentName(rule_1.getString(lineText), captureIndices);
|
||||
if (pushedRule.endHasBackReferences) {
|
||||
stack[stack.length - 1].endRule = pushedRule.getEndWithResolvedBackReferences(lineText, captureIndices);
|
||||
stack[stack.length - 1].endRule = pushedRule.getEndWithResolvedBackReferences(rule_1.getString(lineText), captureIndices);
|
||||
}
|
||||
if (!hasAdvanced && stackElement.ruleId === stack[stack.length - 1].ruleId) {
|
||||
// Grammar pushed the same rule without advancing
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
|
||||
import {clone} from './utils';
|
||||
import {IRawGrammar, IRawRepository, IRawRule} from './types';
|
||||
import {IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, MatchRule, ICompiledRule} from './rule';
|
||||
import {IOnigCaptureIndex, IOnigNextMatchResult} from 'oniguruma';
|
||||
import {IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, MatchRule, ICompiledRule, createOnigString, getString} from './rule';
|
||||
import {IOnigCaptureIndex, IOnigNextMatchResult, OnigString} from 'oniguruma';
|
||||
import {createMatcher, Matcher} from './matcher';
|
||||
|
||||
export function createGrammar(grammar:IRawGrammar, grammarRepository:IGrammarRepository): IGrammar {
|
||||
|
@ -223,9 +223,10 @@ class Grammar implements IGrammar, IRuleFactoryHelper {
|
|||
}
|
||||
|
||||
lineText = lineText + '\n';
|
||||
let lineLength = lineText.length;
|
||||
let onigLineText = createOnigString(lineText);
|
||||
let lineLength = getString(onigLineText).length;
|
||||
let lineTokens = new LineTokens();
|
||||
_tokenizeString(this, lineText, isFirstLine, 0, prevState, lineTokens);
|
||||
_tokenizeString(this, onigLineText, isFirstLine, 0, prevState, lineTokens);
|
||||
|
||||
let _produced = lineTokens.getResult(prevState, lineLength);
|
||||
|
||||
|
@ -248,7 +249,7 @@ function initGrammar(grammar:IRawGrammar, base:IRawRule): IRawGrammar {
|
|||
return grammar;
|
||||
}
|
||||
|
||||
function handleCaptures(grammar: Grammar, lineText: string, isFirstLine: boolean, stack: StackElement[], lineTokens: LineTokens, captures: CaptureRule[], captureIndices: IOnigCaptureIndex[]): void {
|
||||
function handleCaptures(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, stack: StackElement[], lineTokens: LineTokens, captures: CaptureRule[], captureIndices: IOnigCaptureIndex[]): void {
|
||||
if (captures.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -291,13 +292,18 @@ function handleCaptures(grammar: Grammar, lineText: string, isFirstLine: boolean
|
|||
if (captureRule.retokenizeCapturedWithRuleId) {
|
||||
// the capture requires additional matching
|
||||
let stackClone = stack.map((el) => el.clone());
|
||||
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(lineText, captureIndices), captureRule.getContentName(lineText, captureIndices)))
|
||||
_tokenizeString(grammar, lineText.substring(0, captureIndex.end), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
|
||||
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(getString(lineText), captureIndices), captureRule.getContentName(getString(lineText), captureIndices)))
|
||||
_tokenizeString(grammar,
|
||||
createOnigString(
|
||||
getString(lineText).substring(0, captureIndex.end)
|
||||
),
|
||||
(isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// push
|
||||
localStack.push(new LocalStackElement(captureRule.getName(lineText, captureIndices), captureIndex.end));
|
||||
localStack.push(new LocalStackElement(captureRule.getName(getString(lineText), captureIndices), captureIndex.end));
|
||||
}
|
||||
|
||||
while (localStack.length > 0) {
|
||||
|
@ -313,7 +319,7 @@ interface IMatchInjectionsResult {
|
|||
matchedRuleId: number;
|
||||
}
|
||||
|
||||
function matchInjections(injections:Injection[], grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchInjectionsResult {
|
||||
function matchInjections(injections:Injection[], grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchInjectionsResult {
|
||||
// The lower the better
|
||||
let bestMatchRating = Number.MAX_VALUE;
|
||||
let bestMatchCaptureIndices : IOnigCaptureIndex[] = null;
|
||||
|
@ -362,7 +368,7 @@ interface IMatchResult {
|
|||
matchedRuleId: number;
|
||||
}
|
||||
|
||||
function matchRule(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
|
||||
function matchRule(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
|
||||
let stackElement = stack[stack.length - 1];
|
||||
let ruleScanner = grammar.getRule(stackElement.ruleId).compile(grammar, stackElement.endRule, isFirstLine, linePos === anchorPosition);
|
||||
let r = ruleScanner.scanner._findNextMatchSync(lineText, linePos);
|
||||
|
@ -376,7 +382,7 @@ function matchRule(grammar: Grammar, lineText: string, isFirstLine: boolean, lin
|
|||
return null;
|
||||
}
|
||||
|
||||
function matchRuleOrInjections(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
|
||||
function matchRuleOrInjections(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
|
||||
// Look for normal grammar rule
|
||||
let matchResult = matchRule(grammar, lineText, isFirstLine, linePos, stack, anchorPosition);
|
||||
|
||||
|
@ -410,8 +416,8 @@ function matchRuleOrInjections(grammar: Grammar, lineText: string, isFirstLine:
|
|||
return matchResult;
|
||||
}
|
||||
|
||||
function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], lineTokens: LineTokens): void {
|
||||
const lineLength = lineText.length;
|
||||
function _tokenizeString(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], lineTokens: LineTokens): void {
|
||||
const lineLength = getString(lineText).length;
|
||||
|
||||
let anchorPosition = -1;
|
||||
|
||||
|
@ -462,7 +468,7 @@ function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolea
|
|||
lineTokens.produce(stack, captureIndices[0].start);
|
||||
|
||||
// push it on the stack rule
|
||||
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(lineText, captureIndices), null));
|
||||
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(getString(lineText), captureIndices), null));
|
||||
|
||||
if (_rule instanceof BeginEndRule) {
|
||||
let pushedRule = <BeginEndRule>_rule;
|
||||
|
@ -470,10 +476,10 @@ function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolea
|
|||
handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, pushedRule.beginCaptures, captureIndices);
|
||||
lineTokens.produce(stack, captureIndices[0].end);
|
||||
anchorPosition = captureIndices[0].end;
|
||||
stack[stack.length-1].contentName = pushedRule.getContentName(lineText, captureIndices);
|
||||
stack[stack.length-1].contentName = pushedRule.getContentName(getString(lineText), captureIndices);
|
||||
|
||||
if (pushedRule.endHasBackReferences) {
|
||||
stack[stack.length-1].endRule = pushedRule.getEndWithResolvedBackReferences(lineText, captureIndices);
|
||||
stack[stack.length-1].endRule = pushedRule.getEndWithResolvedBackReferences(getString(lineText), captureIndices);
|
||||
}
|
||||
|
||||
if (!hasAdvanced && stackElement.ruleId === stack[stack.length - 1].ruleId) {
|
||||
|
|
38
src/rule.ts
38
src/rule.ts
|
@ -5,7 +5,7 @@
|
|||
|
||||
import {RegexSource, mergeObjects} from './utils';
|
||||
import {IRawGrammar, IRawRepository, IRawRule, IRawCaptures} from './types';
|
||||
import {OnigScanner, IOnigCaptureIndex} from 'oniguruma';
|
||||
import {OnigString, OnigScanner, IOnigCaptureIndex} from 'oniguruma';
|
||||
|
||||
const HAS_BACK_REFERENCES = /\\(\d+)/;
|
||||
const BACK_REFERENCING_END = /\\(\d+)/g;
|
||||
|
@ -261,16 +261,46 @@ interface IRegExpSourceListAnchorCache {
|
|||
A1_G1: ICompiledRule;
|
||||
}
|
||||
|
||||
let createOnigScanner = (function() {
|
||||
var USE_NEW_ONE = true;
|
||||
|
||||
let getOnigModule = (function() {
|
||||
var onigurumaModule: any = null;
|
||||
return function createOnigScanner(sources:string[]): OnigScanner {
|
||||
return function() {
|
||||
if (!onigurumaModule) {
|
||||
if (USE_NEW_ONE) {
|
||||
onigurumaModule = require('alexandrudima-oniguruma');
|
||||
} else {
|
||||
onigurumaModule = require('oniguruma');
|
||||
}
|
||||
return new onigurumaModule.OnigScanner(sources);
|
||||
}
|
||||
return onigurumaModule;
|
||||
}
|
||||
})();
|
||||
|
||||
function createOnigScanner(sources:string[]): OnigScanner {
|
||||
let onigurumaModule = getOnigModule();
|
||||
return new onigurumaModule.OnigScanner(sources);
|
||||
}
|
||||
|
||||
export function createOnigString(sources:string): OnigString {
|
||||
if (USE_NEW_ONE) {
|
||||
let onigurumaModule = getOnigModule();
|
||||
var r = new onigurumaModule.OnigString(sources);
|
||||
(<any>r).$str = sources;
|
||||
return r;
|
||||
} else {
|
||||
return <any>sources;
|
||||
}
|
||||
}
|
||||
|
||||
export function getString(str:OnigString): string {
|
||||
if (USE_NEW_ONE) {
|
||||
return (<any>str).$str;
|
||||
} else {
|
||||
return (<any>str);
|
||||
}
|
||||
}
|
||||
|
||||
export class RegExpSourceList {
|
||||
|
||||
private _items: RegExpSource[];
|
||||
|
|
|
@ -15,6 +15,11 @@ declare module "oniguruma" {
|
|||
export class OnigScanner {
|
||||
constructor(regexps:string[]);
|
||||
_findNextMatchSync(lin:string, pos:number): IOnigNextMatchResult;
|
||||
_findNextMatchSync(lin:OnigString, pos:number): IOnigNextMatchResult;
|
||||
}
|
||||
|
||||
export class OnigString {
|
||||
constructor(str:string);
|
||||
}
|
||||
|
||||
}
|
Загрузка…
Ссылка в новой задаче