зеркало из https://github.com/CryptoPro/go.git
take goroutines out of regexp so they can be created
during initialization. R=rsc CC=go-dev http://go/go-review/1016023
This commit is contained in:
Родитель
e6f85af2bb
Коммит
acc54b6c70
|
@ -27,7 +27,6 @@ import (
|
||||||
"container/vector";
|
"container/vector";
|
||||||
"io";
|
"io";
|
||||||
"os";
|
"os";
|
||||||
"runtime";
|
|
||||||
"utf8";
|
"utf8";
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -72,8 +71,6 @@ func (c *common) setIndex(i int) { c._index = i }
|
||||||
// The public interface is entirely through methods.
|
// The public interface is entirely through methods.
|
||||||
type Regexp struct {
|
type Regexp struct {
|
||||||
expr string; // the original expression
|
expr string; // the original expression
|
||||||
ch chan<- *Regexp; // reply channel when we're done
|
|
||||||
error os.Error; // compile- or run-time error; nil if OK
|
|
||||||
inst *vector.Vector;
|
inst *vector.Vector;
|
||||||
start instr;
|
start instr;
|
||||||
nbra int; // number of brackets in expression, for subexpressions
|
nbra int; // number of brackets in expression, for subexpressions
|
||||||
|
@ -244,13 +241,6 @@ type _Nop struct {
|
||||||
func (nop *_Nop) kind() int { return _NOP }
|
func (nop *_Nop) kind() int { return _NOP }
|
||||||
func (nop *_Nop) print() { print("nop") }
|
func (nop *_Nop) print() { print("nop") }
|
||||||
|
|
||||||
// report error and exit compiling/executing goroutine
|
|
||||||
func (re *Regexp) setError(err os.Error) {
|
|
||||||
re.error = err;
|
|
||||||
re.ch <- re;
|
|
||||||
runtime.Goexit();
|
|
||||||
}
|
|
||||||
|
|
||||||
func (re *Regexp) add(i instr) instr {
|
func (re *Regexp) add(i instr) instr {
|
||||||
i.setIndex(re.inst.Len());
|
i.setIndex(re.inst.Len());
|
||||||
re.inst.Push(i);
|
re.inst.Push(i);
|
||||||
|
@ -259,6 +249,7 @@ func (re *Regexp) add(i instr) instr {
|
||||||
|
|
||||||
type parser struct {
|
type parser struct {
|
||||||
re *Regexp;
|
re *Regexp;
|
||||||
|
error os.Error;
|
||||||
nlpar int; // number of unclosed lpars
|
nlpar int; // number of unclosed lpars
|
||||||
pos int;
|
pos int;
|
||||||
ch int;
|
ch int;
|
||||||
|
@ -288,8 +279,6 @@ func newParser(re *Regexp) *parser {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
var iNULL instr
|
|
||||||
|
|
||||||
func special(c int) bool {
|
func special(c int) bool {
|
||||||
s := `\.+*?()|[]^$`;
|
s := `\.+*?()|[]^$`;
|
||||||
for i := 0; i < len(s); i++ {
|
for i := 0; i < len(s); i++ {
|
||||||
|
@ -321,7 +310,8 @@ func (p *parser) charClass() instr {
|
||||||
switch c := p.c(); c {
|
switch c := p.c(); c {
|
||||||
case ']', endOfFile:
|
case ']', endOfFile:
|
||||||
if left >= 0 {
|
if left >= 0 {
|
||||||
p.re.setError(ErrBadRange);
|
p.error = ErrBadRange;
|
||||||
|
return nil;
|
||||||
}
|
}
|
||||||
// Is it [^\n]?
|
// Is it [^\n]?
|
||||||
if cc.negate && cc.ranges.Len() == 2 &&
|
if cc.negate && cc.ranges.Len() == 2 &&
|
||||||
|
@ -333,18 +323,21 @@ func (p *parser) charClass() instr {
|
||||||
p.re.add(cc);
|
p.re.add(cc);
|
||||||
return cc;
|
return cc;
|
||||||
case '-': // do this before backslash processing
|
case '-': // do this before backslash processing
|
||||||
p.re.setError(ErrBadRange);
|
p.error = ErrBadRange;
|
||||||
|
return nil;
|
||||||
case '\\':
|
case '\\':
|
||||||
c = p.nextc();
|
c = p.nextc();
|
||||||
switch {
|
switch {
|
||||||
case c == endOfFile:
|
case c == endOfFile:
|
||||||
p.re.setError(ErrExtraneousBackslash);
|
p.error = ErrExtraneousBackslash;
|
||||||
|
return nil;
|
||||||
case c == 'n':
|
case c == 'n':
|
||||||
c = '\n';
|
c = '\n';
|
||||||
case specialcclass(c):
|
case specialcclass(c):
|
||||||
// c is as delivered
|
// c is as delivered
|
||||||
default:
|
default:
|
||||||
p.re.setError(ErrBadBackslash);
|
p.error = ErrBadBackslash;
|
||||||
|
return nil;
|
||||||
}
|
}
|
||||||
fallthrough;
|
fallthrough;
|
||||||
default:
|
default:
|
||||||
|
@ -361,26 +354,37 @@ func (p *parser) charClass() instr {
|
||||||
cc.addRange(left, c);
|
cc.addRange(left, c);
|
||||||
left = -1;
|
left = -1;
|
||||||
default:
|
default:
|
||||||
p.re.setError(ErrBadRange);
|
p.error = ErrBadRange;
|
||||||
|
return nil;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return iNULL
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) term() (start, end instr) {
|
func (p *parser) term() (start, end instr) {
|
||||||
|
// term() is the leaf of the recursion, so it's sufficient to pick off the
|
||||||
|
// error state here for early exit.
|
||||||
|
// The other functions (closure(), concatenation() etc.) assume
|
||||||
|
// it's safe to recur to here.
|
||||||
|
if p.error != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
switch c := p.c(); c {
|
switch c := p.c(); c {
|
||||||
case '|', endOfFile:
|
case '|', endOfFile:
|
||||||
return iNULL, iNULL;
|
return nil, nil;
|
||||||
case '*', '+':
|
case '*', '+':
|
||||||
p.re.setError(ErrBareClosure);
|
p.error = ErrBareClosure;
|
||||||
|
return;
|
||||||
case ')':
|
case ')':
|
||||||
if p.nlpar == 0 {
|
if p.nlpar == 0 {
|
||||||
p.re.setError(ErrUnmatchedRpar);
|
p.error = ErrUnmatchedRpar;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
return iNULL, iNULL;
|
return nil, nil;
|
||||||
case ']':
|
case ']':
|
||||||
p.re.setError(ErrUnmatchedRbkt);
|
p.error = ErrUnmatchedRbkt;
|
||||||
|
return;
|
||||||
case '^':
|
case '^':
|
||||||
p.nextc();
|
p.nextc();
|
||||||
start = p.re.add(new(_Bot));
|
start = p.re.add(new(_Bot));
|
||||||
|
@ -396,8 +400,12 @@ func (p *parser) term() (start, end instr) {
|
||||||
case '[':
|
case '[':
|
||||||
p.nextc();
|
p.nextc();
|
||||||
start = p.charClass();
|
start = p.charClass();
|
||||||
|
if p.error != nil {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if p.c() != ']' {
|
if p.c() != ']' {
|
||||||
p.re.setError(ErrUnmatchedLbkt);
|
p.error = ErrUnmatchedLbkt;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
p.nextc();
|
p.nextc();
|
||||||
return start, start;
|
return start, start;
|
||||||
|
@ -408,7 +416,8 @@ func (p *parser) term() (start, end instr) {
|
||||||
nbra := p.re.nbra;
|
nbra := p.re.nbra;
|
||||||
start, end = p.regexp();
|
start, end = p.regexp();
|
||||||
if p.c() != ')' {
|
if p.c() != ')' {
|
||||||
p.re.setError(ErrUnmatchedLpar);
|
p.error = ErrUnmatchedLpar;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
p.nlpar--;
|
p.nlpar--;
|
||||||
p.nextc();
|
p.nextc();
|
||||||
|
@ -418,9 +427,10 @@ func (p *parser) term() (start, end instr) {
|
||||||
p.re.add(ebra);
|
p.re.add(ebra);
|
||||||
bra.n = nbra;
|
bra.n = nbra;
|
||||||
ebra.n = nbra;
|
ebra.n = nbra;
|
||||||
if start == iNULL {
|
if start == nil {
|
||||||
if end == iNULL {
|
if end == nil {
|
||||||
p.re.setError(ErrInternal)
|
p.error = ErrInternal;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
start = ebra
|
start = ebra
|
||||||
} else {
|
} else {
|
||||||
|
@ -432,13 +442,15 @@ func (p *parser) term() (start, end instr) {
|
||||||
c = p.nextc();
|
c = p.nextc();
|
||||||
switch {
|
switch {
|
||||||
case c == endOfFile:
|
case c == endOfFile:
|
||||||
p.re.setError(ErrExtraneousBackslash);
|
p.error = ErrExtraneousBackslash;
|
||||||
|
return;
|
||||||
case c == 'n':
|
case c == 'n':
|
||||||
c = '\n';
|
c = '\n';
|
||||||
case special(c):
|
case special(c):
|
||||||
// c is as delivered
|
// c is as delivered
|
||||||
default:
|
default:
|
||||||
p.re.setError(ErrBadBackslash);
|
p.error = ErrBadBackslash;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
fallthrough;
|
fallthrough;
|
||||||
default:
|
default:
|
||||||
|
@ -452,7 +464,7 @@ func (p *parser) term() (start, end instr) {
|
||||||
|
|
||||||
func (p *parser) closure() (start, end instr) {
|
func (p *parser) closure() (start, end instr) {
|
||||||
start, end = p.term();
|
start, end = p.term();
|
||||||
if start == iNULL {
|
if start == nil || p.error != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
switch p.c() {
|
switch p.c() {
|
||||||
|
@ -487,23 +499,25 @@ func (p *parser) closure() (start, end instr) {
|
||||||
}
|
}
|
||||||
switch p.nextc() {
|
switch p.nextc() {
|
||||||
case '*', '+', '?':
|
case '*', '+', '?':
|
||||||
p.re.setError(ErrBadClosure);
|
p.error = ErrBadClosure;
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) concatenation() (start, end instr) {
|
func (p *parser) concatenation() (start, end instr) {
|
||||||
start, end = iNULL, iNULL;
|
|
||||||
for {
|
for {
|
||||||
nstart, nend := p.closure();
|
nstart, nend := p.closure();
|
||||||
|
if p.error != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
switch {
|
switch {
|
||||||
case nstart == iNULL: // end of this concatenation
|
case nstart == nil: // end of this concatenation
|
||||||
if start == iNULL { // this is the empty string
|
if start == nil { // this is the empty string
|
||||||
nop := p.re.add(new(_Nop));
|
nop := p.re.add(new(_Nop));
|
||||||
return nop, nop;
|
return nop, nop;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
case start == iNULL: // this is first element of concatenation
|
case start == nil: // this is first element of concatenation
|
||||||
start, end = nstart, nend;
|
start, end = nstart, nend;
|
||||||
default:
|
default:
|
||||||
end.setNext(nstart);
|
end.setNext(nstart);
|
||||||
|
@ -515,6 +529,9 @@ func (p *parser) concatenation() (start, end instr) {
|
||||||
|
|
||||||
func (p *parser) regexp() (start, end instr) {
|
func (p *parser) regexp() (start, end instr) {
|
||||||
start, end = p.concatenation();
|
start, end = p.concatenation();
|
||||||
|
if p.error != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
for {
|
for {
|
||||||
switch p.c() {
|
switch p.c() {
|
||||||
default:
|
default:
|
||||||
|
@ -522,6 +539,9 @@ func (p *parser) regexp() (start, end instr) {
|
||||||
case '|':
|
case '|':
|
||||||
p.nextc();
|
p.nextc();
|
||||||
nstart, nend := p.concatenation();
|
nstart, nend := p.concatenation();
|
||||||
|
if p.error != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
alt := new(_Alt);
|
alt := new(_Alt);
|
||||||
p.re.add(alt);
|
p.re.add(alt);
|
||||||
alt.left = start;
|
alt.left = start;
|
||||||
|
@ -569,11 +589,14 @@ func (re *Regexp) dump() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (re *Regexp) doParse() {
|
func (re *Regexp) doParse() os.Error{
|
||||||
p := newParser(re);
|
p := newParser(re);
|
||||||
start := new(_Start);
|
start := new(_Start);
|
||||||
re.add(start);
|
re.add(start);
|
||||||
s, e := p.regexp();
|
s, e := p.regexp();
|
||||||
|
if p.error != nil {
|
||||||
|
return p.error;
|
||||||
|
}
|
||||||
start.setNext(s);
|
start.setNext(s);
|
||||||
re.start = start;
|
re.start = start;
|
||||||
e.setNext(re.add(new(_End)));
|
e.setNext(re.add(new(_End)));
|
||||||
|
@ -584,31 +607,32 @@ func (re *Regexp) doParse() {
|
||||||
}
|
}
|
||||||
|
|
||||||
re.eliminateNops();
|
re.eliminateNops();
|
||||||
|
|
||||||
if debug {
|
if debug {
|
||||||
re.dump();
|
re.dump();
|
||||||
println();
|
println();
|
||||||
}
|
}
|
||||||
}
|
return p.error;
|
||||||
|
|
||||||
|
|
||||||
func compiler(str string, ch chan *Regexp) {
|
|
||||||
re := new(Regexp);
|
|
||||||
re.expr = str;
|
|
||||||
re.inst = vector.New(0);
|
|
||||||
re.ch = ch;
|
|
||||||
re.doParse();
|
|
||||||
ch <- re;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile parses a regular expression and returns, if successful, a Regexp
|
// Compile parses a regular expression and returns, if successful, a Regexp
|
||||||
// object that can be used to match against text.
|
// object that can be used to match against text.
|
||||||
func Compile(str string) (regexp *Regexp, error os.Error) {
|
func Compile(str string) (regexp *Regexp, error os.Error) {
|
||||||
// Compile in a separate goroutine and wait for the result.
|
regexp = new(Regexp);
|
||||||
ch := make(chan *Regexp);
|
regexp.expr = str;
|
||||||
go compiler(str, ch);
|
regexp.inst = vector.New(0);
|
||||||
re := <-ch;
|
error = regexp.doParse();
|
||||||
return re, re.error
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// MustCompile is like Compile but panics if the expression cannot be parsed.
|
||||||
|
// It simplifies safe initialization of global variables holding compiled regular
|
||||||
|
// expressions.
|
||||||
|
func MustCompile(str string) *Regexp {
|
||||||
|
regexp, error := Compile(str);
|
||||||
|
if error != nil {
|
||||||
|
panicln(`regexp: compiling "`, str, `": `, error);
|
||||||
|
}
|
||||||
|
return regexp;
|
||||||
}
|
}
|
||||||
|
|
||||||
type state struct {
|
type state struct {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче