take goroutines out of regexp so they can be created

during initialization.

R=rsc
CC=go-dev
http://go/go-review/1016023
This commit is contained in:
Rob Pike 2009-11-02 10:08:22 -08:00
Родитель e6f85af2bb
Коммит acc54b6c70
1 изменённых файлов: 77 добавлений и 53 удалений

Просмотреть файл

@ -27,7 +27,6 @@ import (
"container/vector";
"io";
"os";
"runtime";
"utf8";
)
@ -72,8 +71,6 @@ func (c *common) setIndex(i int) { c._index = i }
// The public interface is entirely through methods.
type Regexp struct {
expr string; // the original expression
ch chan<- *Regexp; // reply channel when we're done
error os.Error; // compile- or run-time error; nil if OK
inst *vector.Vector;
start instr;
nbra int; // number of brackets in expression, for subexpressions
@ -244,13 +241,6 @@ type _Nop struct {
func (nop *_Nop) kind() int { return _NOP }
func (nop *_Nop) print() { print("nop") }
// report error and exit compiling/executing goroutine
func (re *Regexp) setError(err os.Error) {
re.error = err;
re.ch <- re;
runtime.Goexit();
}
func (re *Regexp) add(i instr) instr {
i.setIndex(re.inst.Len());
re.inst.Push(i);
@ -259,6 +249,7 @@ func (re *Regexp) add(i instr) instr {
type parser struct {
re *Regexp;
error os.Error;
nlpar int; // number of unclosed lpars
pos int;
ch int;
@ -288,8 +279,6 @@ func newParser(re *Regexp) *parser {
return p;
}
var iNULL instr
func special(c int) bool {
s := `\.+*?()|[]^$`;
for i := 0; i < len(s); i++ {
@ -321,7 +310,8 @@ func (p *parser) charClass() instr {
switch c := p.c(); c {
case ']', endOfFile:
if left >= 0 {
p.re.setError(ErrBadRange);
p.error = ErrBadRange;
return nil;
}
// Is it [^\n]?
if cc.negate && cc.ranges.Len() == 2 &&
@ -333,18 +323,21 @@ func (p *parser) charClass() instr {
p.re.add(cc);
return cc;
case '-': // do this before backslash processing
p.re.setError(ErrBadRange);
p.error = ErrBadRange;
return nil;
case '\\':
c = p.nextc();
switch {
case c == endOfFile:
p.re.setError(ErrExtraneousBackslash);
p.error = ErrExtraneousBackslash;
return nil;
case c == 'n':
c = '\n';
case specialcclass(c):
// c is as delivered
default:
p.re.setError(ErrBadBackslash);
p.error = ErrBadBackslash;
return nil;
}
fallthrough;
default:
@ -361,26 +354,37 @@ func (p *parser) charClass() instr {
cc.addRange(left, c);
left = -1;
default:
p.re.setError(ErrBadRange);
p.error = ErrBadRange;
return nil;
}
}
}
return iNULL
return nil
}
func (p *parser) term() (start, end instr) {
// term() is the leaf of the recursion, so it's sufficient to pick off the
// error state here for early exit.
// The other functions (closure(), concatenation() etc.) assume
// it's safe to recur to here.
if p.error != nil {
return
}
switch c := p.c(); c {
case '|', endOfFile:
return iNULL, iNULL;
return nil, nil;
case '*', '+':
p.re.setError(ErrBareClosure);
p.error = ErrBareClosure;
return;
case ')':
if p.nlpar == 0 {
p.re.setError(ErrUnmatchedRpar);
p.error = ErrUnmatchedRpar;
return;
}
return iNULL, iNULL;
return nil, nil;
case ']':
p.re.setError(ErrUnmatchedRbkt);
p.error = ErrUnmatchedRbkt;
return;
case '^':
p.nextc();
start = p.re.add(new(_Bot));
@ -396,8 +400,12 @@ func (p *parser) term() (start, end instr) {
case '[':
p.nextc();
start = p.charClass();
if p.error != nil {
return;
}
if p.c() != ']' {
p.re.setError(ErrUnmatchedLbkt);
p.error = ErrUnmatchedLbkt;
return;
}
p.nextc();
return start, start;
@ -408,7 +416,8 @@ func (p *parser) term() (start, end instr) {
nbra := p.re.nbra;
start, end = p.regexp();
if p.c() != ')' {
p.re.setError(ErrUnmatchedLpar);
p.error = ErrUnmatchedLpar;
return;
}
p.nlpar--;
p.nextc();
@ -418,9 +427,10 @@ func (p *parser) term() (start, end instr) {
p.re.add(ebra);
bra.n = nbra;
ebra.n = nbra;
if start == iNULL {
if end == iNULL {
p.re.setError(ErrInternal)
if start == nil {
if end == nil {
p.error = ErrInternal;
return;
}
start = ebra
} else {
@ -432,13 +442,15 @@ func (p *parser) term() (start, end instr) {
c = p.nextc();
switch {
case c == endOfFile:
p.re.setError(ErrExtraneousBackslash);
p.error = ErrExtraneousBackslash;
return;
case c == 'n':
c = '\n';
case special(c):
// c is as delivered
default:
p.re.setError(ErrBadBackslash);
p.error = ErrBadBackslash;
return;
}
fallthrough;
default:
@ -452,7 +464,7 @@ func (p *parser) term() (start, end instr) {
func (p *parser) closure() (start, end instr) {
start, end = p.term();
if start == iNULL {
if start == nil || p.error != nil {
return
}
switch p.c() {
@ -487,23 +499,25 @@ func (p *parser) closure() (start, end instr) {
}
switch p.nextc() {
case '*', '+', '?':
p.re.setError(ErrBadClosure);
p.error = ErrBadClosure;
}
return
}
func (p *parser) concatenation() (start, end instr) {
start, end = iNULL, iNULL;
for {
nstart, nend := p.closure();
if p.error != nil {
return
}
switch {
case nstart == iNULL: // end of this concatenation
if start == iNULL { // this is the empty string
case nstart == nil: // end of this concatenation
if start == nil { // this is the empty string
nop := p.re.add(new(_Nop));
return nop, nop;
}
return;
case start == iNULL: // this is first element of concatenation
case start == nil: // this is first element of concatenation
start, end = nstart, nend;
default:
end.setNext(nstart);
@ -515,6 +529,9 @@ func (p *parser) concatenation() (start, end instr) {
func (p *parser) regexp() (start, end instr) {
start, end = p.concatenation();
if p.error != nil {
return
}
for {
switch p.c() {
default:
@ -522,6 +539,9 @@ func (p *parser) regexp() (start, end instr) {
case '|':
p.nextc();
nstart, nend := p.concatenation();
if p.error != nil {
return
}
alt := new(_Alt);
p.re.add(alt);
alt.left = start;
@ -569,11 +589,14 @@ func (re *Regexp) dump() {
}
}
func (re *Regexp) doParse() {
func (re *Regexp) doParse() os.Error{
p := newParser(re);
start := new(_Start);
re.add(start);
s, e := p.regexp();
if p.error != nil {
return p.error;
}
start.setNext(s);
re.start = start;
e.setNext(re.add(new(_End)));
@ -584,31 +607,32 @@ func (re *Regexp) doParse() {
}
re.eliminateNops();
if debug {
re.dump();
println();
}
}
func compiler(str string, ch chan *Regexp) {
re := new(Regexp);
re.expr = str;
re.inst = vector.New(0);
re.ch = ch;
re.doParse();
ch <- re;
return p.error;
}
// Compile parses a regular expression and returns, if successful, a Regexp
// object that can be used to match against text.
func Compile(str string) (regexp *Regexp, error os.Error) {
// Compile in a separate goroutine and wait for the result.
ch := make(chan *Regexp);
go compiler(str, ch);
re := <-ch;
return re, re.error
regexp = new(Regexp);
regexp.expr = str;
regexp.inst = vector.New(0);
error = regexp.doParse();
return;
}
// MustCompile is like Compile but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func MustCompile(str string) *Regexp {
regexp, error := Compile(str);
if error != nil {
panicln(`regexp: compiling "`, str, `": `, error);
}
return regexp;
}
type state struct {