take goroutines out of regexp so they can be created

during initialization.

R=rsc
CC=go-dev
http://go/go-review/1016023
This commit is contained in:
Rob Pike 2009-11-02 10:08:22 -08:00
Родитель e6f85af2bb
Коммит acc54b6c70
1 изменённых файлов: 77 добавлений и 53 удалений

Просмотреть файл

@ -27,7 +27,6 @@ import (
"container/vector"; "container/vector";
"io"; "io";
"os"; "os";
"runtime";
"utf8"; "utf8";
) )
@ -72,8 +71,6 @@ func (c *common) setIndex(i int) { c._index = i }
// The public interface is entirely through methods. // The public interface is entirely through methods.
type Regexp struct { type Regexp struct {
expr string; // the original expression expr string; // the original expression
ch chan<- *Regexp; // reply channel when we're done
error os.Error; // compile- or run-time error; nil if OK
inst *vector.Vector; inst *vector.Vector;
start instr; start instr;
nbra int; // number of brackets in expression, for subexpressions nbra int; // number of brackets in expression, for subexpressions
@ -244,13 +241,6 @@ type _Nop struct {
func (nop *_Nop) kind() int { return _NOP } func (nop *_Nop) kind() int { return _NOP }
func (nop *_Nop) print() { print("nop") } func (nop *_Nop) print() { print("nop") }
// report error and exit compiling/executing goroutine
func (re *Regexp) setError(err os.Error) {
re.error = err;
re.ch <- re;
runtime.Goexit();
}
func (re *Regexp) add(i instr) instr { func (re *Regexp) add(i instr) instr {
i.setIndex(re.inst.Len()); i.setIndex(re.inst.Len());
re.inst.Push(i); re.inst.Push(i);
@ -259,6 +249,7 @@ func (re *Regexp) add(i instr) instr {
type parser struct { type parser struct {
re *Regexp; re *Regexp;
error os.Error;
nlpar int; // number of unclosed lpars nlpar int; // number of unclosed lpars
pos int; pos int;
ch int; ch int;
@ -288,8 +279,6 @@ func newParser(re *Regexp) *parser {
return p; return p;
} }
var iNULL instr
func special(c int) bool { func special(c int) bool {
s := `\.+*?()|[]^$`; s := `\.+*?()|[]^$`;
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
@ -321,7 +310,8 @@ func (p *parser) charClass() instr {
switch c := p.c(); c { switch c := p.c(); c {
case ']', endOfFile: case ']', endOfFile:
if left >= 0 { if left >= 0 {
p.re.setError(ErrBadRange); p.error = ErrBadRange;
return nil;
} }
// Is it [^\n]? // Is it [^\n]?
if cc.negate && cc.ranges.Len() == 2 && if cc.negate && cc.ranges.Len() == 2 &&
@ -333,18 +323,21 @@ func (p *parser) charClass() instr {
p.re.add(cc); p.re.add(cc);
return cc; return cc;
case '-': // do this before backslash processing case '-': // do this before backslash processing
p.re.setError(ErrBadRange); p.error = ErrBadRange;
return nil;
case '\\': case '\\':
c = p.nextc(); c = p.nextc();
switch { switch {
case c == endOfFile: case c == endOfFile:
p.re.setError(ErrExtraneousBackslash); p.error = ErrExtraneousBackslash;
return nil;
case c == 'n': case c == 'n':
c = '\n'; c = '\n';
case specialcclass(c): case specialcclass(c):
// c is as delivered // c is as delivered
default: default:
p.re.setError(ErrBadBackslash); p.error = ErrBadBackslash;
return nil;
} }
fallthrough; fallthrough;
default: default:
@ -361,26 +354,37 @@ func (p *parser) charClass() instr {
cc.addRange(left, c); cc.addRange(left, c);
left = -1; left = -1;
default: default:
p.re.setError(ErrBadRange); p.error = ErrBadRange;
return nil;
} }
} }
} }
return iNULL return nil
} }
func (p *parser) term() (start, end instr) { func (p *parser) term() (start, end instr) {
// term() is the leaf of the recursion, so it's sufficient to pick off the
// error state here for early exit.
// The other functions (closure(), concatenation() etc.) assume
// it's safe to recur to here.
if p.error != nil {
return
}
switch c := p.c(); c { switch c := p.c(); c {
case '|', endOfFile: case '|', endOfFile:
return iNULL, iNULL; return nil, nil;
case '*', '+': case '*', '+':
p.re.setError(ErrBareClosure); p.error = ErrBareClosure;
return;
case ')': case ')':
if p.nlpar == 0 { if p.nlpar == 0 {
p.re.setError(ErrUnmatchedRpar); p.error = ErrUnmatchedRpar;
return;
} }
return iNULL, iNULL; return nil, nil;
case ']': case ']':
p.re.setError(ErrUnmatchedRbkt); p.error = ErrUnmatchedRbkt;
return;
case '^': case '^':
p.nextc(); p.nextc();
start = p.re.add(new(_Bot)); start = p.re.add(new(_Bot));
@ -396,8 +400,12 @@ func (p *parser) term() (start, end instr) {
case '[': case '[':
p.nextc(); p.nextc();
start = p.charClass(); start = p.charClass();
if p.error != nil {
return;
}
if p.c() != ']' { if p.c() != ']' {
p.re.setError(ErrUnmatchedLbkt); p.error = ErrUnmatchedLbkt;
return;
} }
p.nextc(); p.nextc();
return start, start; return start, start;
@ -408,7 +416,8 @@ func (p *parser) term() (start, end instr) {
nbra := p.re.nbra; nbra := p.re.nbra;
start, end = p.regexp(); start, end = p.regexp();
if p.c() != ')' { if p.c() != ')' {
p.re.setError(ErrUnmatchedLpar); p.error = ErrUnmatchedLpar;
return;
} }
p.nlpar--; p.nlpar--;
p.nextc(); p.nextc();
@ -418,9 +427,10 @@ func (p *parser) term() (start, end instr) {
p.re.add(ebra); p.re.add(ebra);
bra.n = nbra; bra.n = nbra;
ebra.n = nbra; ebra.n = nbra;
if start == iNULL { if start == nil {
if end == iNULL { if end == nil {
p.re.setError(ErrInternal) p.error = ErrInternal;
return;
} }
start = ebra start = ebra
} else { } else {
@ -432,13 +442,15 @@ func (p *parser) term() (start, end instr) {
c = p.nextc(); c = p.nextc();
switch { switch {
case c == endOfFile: case c == endOfFile:
p.re.setError(ErrExtraneousBackslash); p.error = ErrExtraneousBackslash;
return;
case c == 'n': case c == 'n':
c = '\n'; c = '\n';
case special(c): case special(c):
// c is as delivered // c is as delivered
default: default:
p.re.setError(ErrBadBackslash); p.error = ErrBadBackslash;
return;
} }
fallthrough; fallthrough;
default: default:
@ -452,7 +464,7 @@ func (p *parser) term() (start, end instr) {
func (p *parser) closure() (start, end instr) { func (p *parser) closure() (start, end instr) {
start, end = p.term(); start, end = p.term();
if start == iNULL { if start == nil || p.error != nil {
return return
} }
switch p.c() { switch p.c() {
@ -487,23 +499,25 @@ func (p *parser) closure() (start, end instr) {
} }
switch p.nextc() { switch p.nextc() {
case '*', '+', '?': case '*', '+', '?':
p.re.setError(ErrBadClosure); p.error = ErrBadClosure;
} }
return return
} }
func (p *parser) concatenation() (start, end instr) { func (p *parser) concatenation() (start, end instr) {
start, end = iNULL, iNULL;
for { for {
nstart, nend := p.closure(); nstart, nend := p.closure();
if p.error != nil {
return
}
switch { switch {
case nstart == iNULL: // end of this concatenation case nstart == nil: // end of this concatenation
if start == iNULL { // this is the empty string if start == nil { // this is the empty string
nop := p.re.add(new(_Nop)); nop := p.re.add(new(_Nop));
return nop, nop; return nop, nop;
} }
return; return;
case start == iNULL: // this is first element of concatenation case start == nil: // this is first element of concatenation
start, end = nstart, nend; start, end = nstart, nend;
default: default:
end.setNext(nstart); end.setNext(nstart);
@ -515,6 +529,9 @@ func (p *parser) concatenation() (start, end instr) {
func (p *parser) regexp() (start, end instr) { func (p *parser) regexp() (start, end instr) {
start, end = p.concatenation(); start, end = p.concatenation();
if p.error != nil {
return
}
for { for {
switch p.c() { switch p.c() {
default: default:
@ -522,6 +539,9 @@ func (p *parser) regexp() (start, end instr) {
case '|': case '|':
p.nextc(); p.nextc();
nstart, nend := p.concatenation(); nstart, nend := p.concatenation();
if p.error != nil {
return
}
alt := new(_Alt); alt := new(_Alt);
p.re.add(alt); p.re.add(alt);
alt.left = start; alt.left = start;
@ -569,11 +589,14 @@ func (re *Regexp) dump() {
} }
} }
func (re *Regexp) doParse() { func (re *Regexp) doParse() os.Error{
p := newParser(re); p := newParser(re);
start := new(_Start); start := new(_Start);
re.add(start); re.add(start);
s, e := p.regexp(); s, e := p.regexp();
if p.error != nil {
return p.error;
}
start.setNext(s); start.setNext(s);
re.start = start; re.start = start;
e.setNext(re.add(new(_End))); e.setNext(re.add(new(_End)));
@ -584,31 +607,32 @@ func (re *Regexp) doParse() {
} }
re.eliminateNops(); re.eliminateNops();
if debug { if debug {
re.dump(); re.dump();
println(); println();
} }
} return p.error;
func compiler(str string, ch chan *Regexp) {
re := new(Regexp);
re.expr = str;
re.inst = vector.New(0);
re.ch = ch;
re.doParse();
ch <- re;
} }
// Compile parses a regular expression and returns, if successful, a Regexp // Compile parses a regular expression and returns, if successful, a Regexp
// object that can be used to match against text. // object that can be used to match against text.
func Compile(str string) (regexp *Regexp, error os.Error) { func Compile(str string) (regexp *Regexp, error os.Error) {
// Compile in a separate goroutine and wait for the result. regexp = new(Regexp);
ch := make(chan *Regexp); regexp.expr = str;
go compiler(str, ch); regexp.inst = vector.New(0);
re := <-ch; error = regexp.doParse();
return re, re.error return;
}
// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func MustCompile(str string) *Regexp {
regexp, error := Compile(str);
if error != nil {
panicln(`regexp: compiling "`, str, `": `, error);
}
return regexp;
} }
type state struct { type state struct {