Adopt oniguruma with OnigScanner

This commit is contained in:
Alex Dima 2015-12-06 22:06:48 +01:00
Родитель e08a39f73e
Коммит 6be0ea8e60
11 изменённых файлов: 120 добавлений и 40 удалений

Просмотреть файл

@ -25,7 +25,9 @@ function tokenizeFile(filePath, grammar, message) {
tokenize(grammar, content.split(/\r\n|\r|\n/));
}
tokenizeFile(path.join(__dirname, 'large.js'), jsGrammar, 'Tokenizing jQuery v2.0.3')
tokenizeFile(path.join(__dirname, 'large.min.js'), jsGrammar, 'Tokenizing jQuery v2.0.3 minified')
tokenizeFile(path.join(__dirname, 'bootstrap.css'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1')
tokenizeFile(path.join(__dirname, 'bootstrap.min.css'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1 minified')
tokenizeFile(path.join(__dirname, 'main.08642f99.css.txt'), cssGrammar, 'Tokenizing Bootstrap with multi-byte')
tokenizeFile(path.join(__dirname, 'large.js.txt'), jsGrammar, 'Tokenizing jQuery v2.0.3')
tokenizeFile(path.join(__dirname, 'large.min.js.txt'), jsGrammar, 'Tokenizing jQuery v2.0.3 minified')
tokenizeFile(path.join(__dirname, 'bootstrap.css.txt'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1')
tokenizeFile(path.join(__dirname, 'bootstrap.min.css.txt'), cssGrammar, 'Tokenizing Bootstrap CSS v3.1.1 minified')

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Различия файлов скрыты, потому что одна или несколько строк слишком длинны

Просмотреть файл

@ -1,6 +1,6 @@
{
"name": "vscode-textmate",
"version": "1.0.8",
"version": "1.0.9",
"description": "VSCode TextMate grammar helpers",
"author": {
"name": "Microsoft Corporation"
@ -20,7 +20,7 @@
"benchmark": "node benchmark/benchmark.js"
},
"dependencies": {
"oniguruma": "^5.1.1",
"alexandrudima-oniguruma": "^6.0.0",
"sax": "^1.1.1"
},
"devDependencies": {

Просмотреть файл

@ -558,15 +558,46 @@ var RegExpSource = (function () {
return RegExpSource;
})();
exports.RegExpSource = RegExpSource;
var createOnigScanner = (function () {
var USE_NEW_ONE = true;
var getOnigModule = (function () {
var onigurumaModule = null;
return function createOnigScanner(sources) {
return function () {
if (!onigurumaModule) {
if (USE_NEW_ONE) {
onigurumaModule = require('alexandrudima-oniguruma');
}
else {
onigurumaModule = require('oniguruma');
}
return new onigurumaModule.OnigScanner(sources);
}
return onigurumaModule;
};
})();
function createOnigScanner(sources) {
var onigurumaModule = getOnigModule();
return new onigurumaModule.OnigScanner(sources);
}
function createOnigString(sources) {
if (USE_NEW_ONE) {
var onigurumaModule = getOnigModule();
var r = new onigurumaModule.OnigString(sources);
r.$str = sources;
return r;
}
else {
return sources;
}
}
exports.createOnigString = createOnigString;
function getString(str) {
if (USE_NEW_ONE) {
return str.$str;
}
else {
return str;
}
}
exports.getString = getString;
var RegExpSourceList = (function () {
function RegExpSourceList() {
this._items = [];
@ -1046,9 +1077,10 @@ var Grammar = (function () {
}
}
lineText = lineText + '\n';
var lineLength = lineText.length;
var onigLineText = rule_1.createOnigString(lineText);
var lineLength = rule_1.getString(onigLineText).length;
var lineTokens = new LineTokens();
_tokenizeString(this, lineText, isFirstLine, 0, prevState, lineTokens);
_tokenizeString(this, onigLineText, isFirstLine, 0, prevState, lineTokens);
var _produced = lineTokens.getResult(prevState, lineLength);
return {
tokens: _produced,
@ -1097,12 +1129,12 @@ function handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, captu
if (captureRule.retokenizeCapturedWithRuleId) {
// the capture requires additional matching
var stackClone = stack.map(function (el) { return el.clone(); });
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(lineText, captureIndices), captureRule.getContentName(lineText, captureIndices)));
_tokenizeString(grammar, lineText.substring(0, captureIndex.end), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(rule_1.getString(lineText), captureIndices), captureRule.getContentName(rule_1.getString(lineText), captureIndices)));
_tokenizeString(grammar, rule_1.createOnigString(rule_1.getString(lineText).substring(0, captureIndex.end)), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
continue;
}
// push
localStack.push(new LocalStackElement(captureRule.getName(lineText, captureIndices), captureIndex.end));
localStack.push(new LocalStackElement(captureRule.getName(rule_1.getString(lineText), captureIndices), captureIndex.end));
}
while (localStack.length > 0) {
// pop!
@ -1185,7 +1217,7 @@ function matchRuleOrInjections(grammar, lineText, isFirstLine, linePos, stack, a
return matchResult;
}
function _tokenizeString(grammar, lineText, isFirstLine, linePos, stack, lineTokens) {
var lineLength = lineText.length;
var lineLength = rule_1.getString(lineText).length;
var anchorPosition = -1;
while (linePos < lineLength) {
scanNext(); // potentially modifies linePos && anchorPosition
@ -1224,15 +1256,15 @@ function _tokenizeString(grammar, lineText, isFirstLine, linePos, stack, lineTok
var _rule = grammar.getRule(matchedRuleId);
lineTokens.produce(stack, captureIndices[0].start);
// push it on the stack rule
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(lineText, captureIndices), null));
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(rule_1.getString(lineText), captureIndices), null));
if (_rule instanceof rule_1.BeginEndRule) {
var pushedRule = _rule;
handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, pushedRule.beginCaptures, captureIndices);
lineTokens.produce(stack, captureIndices[0].end);
anchorPosition = captureIndices[0].end;
stack[stack.length - 1].contentName = pushedRule.getContentName(lineText, captureIndices);
stack[stack.length - 1].contentName = pushedRule.getContentName(rule_1.getString(lineText), captureIndices);
if (pushedRule.endHasBackReferences) {
stack[stack.length - 1].endRule = pushedRule.getEndWithResolvedBackReferences(lineText, captureIndices);
stack[stack.length - 1].endRule = pushedRule.getEndWithResolvedBackReferences(rule_1.getString(lineText), captureIndices);
}
if (!hasAdvanced && stackElement.ruleId === stack[stack.length - 1].ruleId) {
// Grammar pushed the same rule without advancing

Просмотреть файл

@ -5,8 +5,8 @@
import {clone} from './utils';
import {IRawGrammar, IRawRepository, IRawRule} from './types';
import {IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, MatchRule, ICompiledRule} from './rule';
import {IOnigCaptureIndex, IOnigNextMatchResult} from 'oniguruma';
import {IRuleFactoryHelper, RuleFactory, Rule, CaptureRule, BeginEndRule, MatchRule, ICompiledRule, createOnigString, getString} from './rule';
import {IOnigCaptureIndex, IOnigNextMatchResult, OnigString} from 'oniguruma';
import {createMatcher, Matcher} from './matcher';
export function createGrammar(grammar:IRawGrammar, grammarRepository:IGrammarRepository): IGrammar {
@ -223,9 +223,10 @@ class Grammar implements IGrammar, IRuleFactoryHelper {
}
lineText = lineText + '\n';
let lineLength = lineText.length;
let onigLineText = createOnigString(lineText);
let lineLength = getString(onigLineText).length;
let lineTokens = new LineTokens();
_tokenizeString(this, lineText, isFirstLine, 0, prevState, lineTokens);
_tokenizeString(this, onigLineText, isFirstLine, 0, prevState, lineTokens);
let _produced = lineTokens.getResult(prevState, lineLength);
@ -248,7 +249,7 @@ function initGrammar(grammar:IRawGrammar, base:IRawRule): IRawGrammar {
return grammar;
}
function handleCaptures(grammar: Grammar, lineText: string, isFirstLine: boolean, stack: StackElement[], lineTokens: LineTokens, captures: CaptureRule[], captureIndices: IOnigCaptureIndex[]): void {
function handleCaptures(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, stack: StackElement[], lineTokens: LineTokens, captures: CaptureRule[], captureIndices: IOnigCaptureIndex[]): void {
if (captures.length === 0) {
return;
}
@ -291,13 +292,18 @@ function handleCaptures(grammar: Grammar, lineText: string, isFirstLine: boolean
if (captureRule.retokenizeCapturedWithRuleId) {
// the capture requires additional matching
let stackClone = stack.map((el) => el.clone());
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(lineText, captureIndices), captureRule.getContentName(lineText, captureIndices)))
_tokenizeString(grammar, lineText.substring(0, captureIndex.end), (isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens);
stackClone.push(new StackElement(captureRule.retokenizeCapturedWithRuleId, captureIndex.start, null, captureRule.getName(getString(lineText), captureIndices), captureRule.getContentName(getString(lineText), captureIndices)))
_tokenizeString(grammar,
createOnigString(
getString(lineText).substring(0, captureIndex.end)
),
(isFirstLine && captureIndex.start === 0), captureIndex.start, stackClone, lineTokens
);
continue;
}
// push
localStack.push(new LocalStackElement(captureRule.getName(lineText, captureIndices), captureIndex.end));
localStack.push(new LocalStackElement(captureRule.getName(getString(lineText), captureIndices), captureIndex.end));
}
while (localStack.length > 0) {
@ -313,7 +319,7 @@ interface IMatchInjectionsResult {
matchedRuleId: number;
}
function matchInjections(injections:Injection[], grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchInjectionsResult {
function matchInjections(injections:Injection[], grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchInjectionsResult {
// The lower the better
let bestMatchRating = Number.MAX_VALUE;
let bestMatchCaptureIndices : IOnigCaptureIndex[] = null;
@ -362,7 +368,7 @@ interface IMatchResult {
matchedRuleId: number;
}
function matchRule(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
function matchRule(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
let stackElement = stack[stack.length - 1];
let ruleScanner = grammar.getRule(stackElement.ruleId).compile(grammar, stackElement.endRule, isFirstLine, linePos === anchorPosition);
let r = ruleScanner.scanner._findNextMatchSync(lineText, linePos);
@ -376,7 +382,7 @@ function matchRule(grammar: Grammar, lineText: string, isFirstLine: boolean, lin
return null;
}
function matchRuleOrInjections(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
function matchRuleOrInjections(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], anchorPosition:number): IMatchResult {
// Look for normal grammar rule
let matchResult = matchRule(grammar, lineText, isFirstLine, linePos, stack, anchorPosition);
@ -410,8 +416,8 @@ function matchRuleOrInjections(grammar: Grammar, lineText: string, isFirstLine:
return matchResult;
}
function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolean, linePos: number, stack: StackElement[], lineTokens: LineTokens): void {
const lineLength = lineText.length;
function _tokenizeString(grammar: Grammar, lineText: OnigString, isFirstLine: boolean, linePos: number, stack: StackElement[], lineTokens: LineTokens): void {
const lineLength = getString(lineText).length;
let anchorPosition = -1;
@ -462,7 +468,7 @@ function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolea
lineTokens.produce(stack, captureIndices[0].start);
// push it on the stack rule
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(lineText, captureIndices), null));
stack.push(new StackElement(matchedRuleId, linePos, null, _rule.getName(getString(lineText), captureIndices), null));
if (_rule instanceof BeginEndRule) {
let pushedRule = <BeginEndRule>_rule;
@ -470,10 +476,10 @@ function _tokenizeString(grammar: Grammar, lineText: string, isFirstLine: boolea
handleCaptures(grammar, lineText, isFirstLine, stack, lineTokens, pushedRule.beginCaptures, captureIndices);
lineTokens.produce(stack, captureIndices[0].end);
anchorPosition = captureIndices[0].end;
stack[stack.length-1].contentName = pushedRule.getContentName(lineText, captureIndices);
stack[stack.length-1].contentName = pushedRule.getContentName(getString(lineText), captureIndices);
if (pushedRule.endHasBackReferences) {
stack[stack.length-1].endRule = pushedRule.getEndWithResolvedBackReferences(lineText, captureIndices);
stack[stack.length-1].endRule = pushedRule.getEndWithResolvedBackReferences(getString(lineText), captureIndices);
}
if (!hasAdvanced && stackElement.ruleId === stack[stack.length - 1].ruleId) {

Просмотреть файл

@ -5,7 +5,7 @@
import {RegexSource, mergeObjects} from './utils';
import {IRawGrammar, IRawRepository, IRawRule, IRawCaptures} from './types';
import {OnigScanner, IOnigCaptureIndex} from 'oniguruma';
import {OnigString, OnigScanner, IOnigCaptureIndex} from 'oniguruma';
const HAS_BACK_REFERENCES = /\\(\d+)/;
const BACK_REFERENCING_END = /\\(\d+)/g;
@ -261,16 +261,46 @@ interface IRegExpSourceListAnchorCache {
A1_G1: ICompiledRule;
}
let createOnigScanner = (function() {
var USE_NEW_ONE = true;
let getOnigModule = (function() {
var onigurumaModule: any = null;
return function createOnigScanner(sources:string[]): OnigScanner {
return function() {
if (!onigurumaModule) {
if (USE_NEW_ONE) {
onigurumaModule = require('alexandrudima-oniguruma');
} else {
onigurumaModule = require('oniguruma');
}
return new onigurumaModule.OnigScanner(sources);
}
return onigurumaModule;
}
})();
function createOnigScanner(sources:string[]): OnigScanner {
let onigurumaModule = getOnigModule();
return new onigurumaModule.OnigScanner(sources);
}
export function createOnigString(sources:string): OnigString {
if (USE_NEW_ONE) {
let onigurumaModule = getOnigModule();
var r = new onigurumaModule.OnigString(sources);
(<any>r).$str = sources;
return r;
} else {
return <any>sources;
}
}
export function getString(str:OnigString): string {
if (USE_NEW_ONE) {
return (<any>str).$str;
} else {
return (<any>str);
}
}
export class RegExpSourceList {
private _items: RegExpSource[];

5
src/typings/oniguruma.d.ts поставляемый
Просмотреть файл

@ -15,6 +15,11 @@ declare module "oniguruma" {
export class OnigScanner {
constructor(regexps:string[]);
_findNextMatchSync(lin:string, pos:number): IOnigNextMatchResult;
_findNextMatchSync(lin:OnigString, pos:number): IOnigNextMatchResult;
}
export class OnigString {
constructor(str:string);
}
}