From acc54b6c707e8571101f5b018bc281a239b5a19a Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Mon, 2 Nov 2009 10:08:22 -0800 Subject: [PATCH] take goroutines out of regexp so they can be created during initialization. R=rsc CC=go-dev http://go/go-review/1016023 --- src/pkg/regexp/regexp.go | 130 +++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 53 deletions(-) diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index f754418ecb..0b69ea01d2 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -27,7 +27,6 @@ import ( "container/vector"; "io"; "os"; - "runtime"; "utf8"; ) @@ -72,8 +71,6 @@ func (c *common) setIndex(i int) { c._index = i } // The public interface is entirely through methods. type Regexp struct { expr string; // the original expression - ch chan<- *Regexp; // reply channel when we're done - error os.Error; // compile- or run-time error; nil if OK inst *vector.Vector; start instr; nbra int; // number of brackets in expression, for subexpressions @@ -244,13 +241,6 @@ type _Nop struct { func (nop *_Nop) kind() int { return _NOP } func (nop *_Nop) print() { print("nop") } -// report error and exit compiling/executing goroutine -func (re *Regexp) setError(err os.Error) { - re.error = err; - re.ch <- re; - runtime.Goexit(); -} - func (re *Regexp) add(i instr) instr { i.setIndex(re.inst.Len()); re.inst.Push(i); @@ -259,6 +249,7 @@ func (re *Regexp) add(i instr) instr { type parser struct { re *Regexp; + error os.Error; nlpar int; // number of unclosed lpars pos int; ch int; @@ -288,8 +279,6 @@ func newParser(re *Regexp) *parser { return p; } -var iNULL instr - func special(c int) bool { s := `\.+*?()|[]^$`; for i := 0; i < len(s); i++ { @@ -321,7 +310,8 @@ func (p *parser) charClass() instr { switch c := p.c(); c { case ']', endOfFile: if left >= 0 { - p.re.setError(ErrBadRange); + p.error = ErrBadRange; + return nil; } // Is it [^\n]? if cc.negate && cc.ranges.Len() == 2 && @@ -333,18 +323,21 @@ func (p *parser) charClass() instr { p.re.add(cc); return cc; case '-': // do this before backslash processing - p.re.setError(ErrBadRange); + p.error = ErrBadRange; + return nil; case '\\': c = p.nextc(); switch { case c == endOfFile: - p.re.setError(ErrExtraneousBackslash); + p.error = ErrExtraneousBackslash; + return nil; case c == 'n': c = '\n'; case specialcclass(c): // c is as delivered default: - p.re.setError(ErrBadBackslash); + p.error = ErrBadBackslash; + return nil; } fallthrough; default: @@ -361,26 +354,37 @@ func (p *parser) charClass() instr { cc.addRange(left, c); left = -1; default: - p.re.setError(ErrBadRange); + p.error = ErrBadRange; + return nil; } } } - return iNULL + return nil } func (p *parser) term() (start, end instr) { + // term() is the leaf of the recursion, so it's sufficient to pick off the + // error state here for early exit. + // The other functions (closure(), concatenation() etc.) assume + // it's safe to recur to here. + if p.error != nil { + return + } switch c := p.c(); c { case '|', endOfFile: - return iNULL, iNULL; + return nil, nil; case '*', '+': - p.re.setError(ErrBareClosure); + p.error = ErrBareClosure; + return; case ')': if p.nlpar == 0 { - p.re.setError(ErrUnmatchedRpar); + p.error = ErrUnmatchedRpar; + return; } - return iNULL, iNULL; + return nil, nil; case ']': - p.re.setError(ErrUnmatchedRbkt); + p.error = ErrUnmatchedRbkt; + return; case '^': p.nextc(); start = p.re.add(new(_Bot)); @@ -396,8 +400,12 @@ func (p *parser) term() (start, end instr) { case '[': p.nextc(); start = p.charClass(); + if p.error != nil { + return; + } if p.c() != ']' { - p.re.setError(ErrUnmatchedLbkt); + p.error = ErrUnmatchedLbkt; + return; } p.nextc(); return start, start; @@ -408,7 +416,8 @@ func (p *parser) term() (start, end instr) { nbra := p.re.nbra; start, end = p.regexp(); if p.c() != ')' { - p.re.setError(ErrUnmatchedLpar); + p.error = ErrUnmatchedLpar; + return; } p.nlpar--; p.nextc(); @@ -418,9 +427,10 @@ func (p *parser) term() (start, end instr) { p.re.add(ebra); bra.n = nbra; ebra.n = nbra; - if start == iNULL { - if end == iNULL { - p.re.setError(ErrInternal) + if start == nil { + if end == nil { + p.error = ErrInternal; + return; } start = ebra } else { @@ -432,13 +442,15 @@ func (p *parser) term() (start, end instr) { c = p.nextc(); switch { case c == endOfFile: - p.re.setError(ErrExtraneousBackslash); + p.error = ErrExtraneousBackslash; + return; case c == 'n': c = '\n'; case special(c): // c is as delivered default: - p.re.setError(ErrBadBackslash); + p.error = ErrBadBackslash; + return; } fallthrough; default: @@ -452,7 +464,7 @@ func (p *parser) term() (start, end instr) { func (p *parser) closure() (start, end instr) { start, end = p.term(); - if start == iNULL { + if start == nil || p.error != nil { return } switch p.c() { @@ -487,23 +499,25 @@ func (p *parser) closure() (start, end instr) { } switch p.nextc() { case '*', '+', '?': - p.re.setError(ErrBadClosure); + p.error = ErrBadClosure; } return } func (p *parser) concatenation() (start, end instr) { - start, end = iNULL, iNULL; for { nstart, nend := p.closure(); + if p.error != nil { + return + } switch { - case nstart == iNULL: // end of this concatenation - if start == iNULL { // this is the empty string + case nstart == nil: // end of this concatenation + if start == nil { // this is the empty string nop := p.re.add(new(_Nop)); return nop, nop; } return; - case start == iNULL: // this is first element of concatenation + case start == nil: // this is first element of concatenation start, end = nstart, nend; default: end.setNext(nstart); @@ -515,6 +529,9 @@ func (p *parser) concatenation() (start, end instr) { func (p *parser) regexp() (start, end instr) { start, end = p.concatenation(); + if p.error != nil { + return + } for { switch p.c() { default: @@ -522,6 +539,9 @@ func (p *parser) regexp() (start, end instr) { case '|': p.nextc(); nstart, nend := p.concatenation(); + if p.error != nil { + return + } alt := new(_Alt); p.re.add(alt); alt.left = start; @@ -569,11 +589,14 @@ func (re *Regexp) dump() { } } -func (re *Regexp) doParse() { +func (re *Regexp) doParse() os.Error{ p := newParser(re); start := new(_Start); re.add(start); s, e := p.regexp(); + if p.error != nil { + return p.error; + } start.setNext(s); re.start = start; e.setNext(re.add(new(_End))); @@ -584,31 +607,32 @@ func (re *Regexp) doParse() { } re.eliminateNops(); - if debug { re.dump(); println(); } -} - - -func compiler(str string, ch chan *Regexp) { - re := new(Regexp); - re.expr = str; - re.inst = vector.New(0); - re.ch = ch; - re.doParse(); - ch <- re; + return p.error; } // Compile parses a regular expression and returns, if successful, a Regexp // object that can be used to match against text. func Compile(str string) (regexp *Regexp, error os.Error) { - // Compile in a separate goroutine and wait for the result. - ch := make(chan *Regexp); - go compiler(str, ch); - re := <-ch; - return re, re.error + regexp = new(Regexp); + regexp.expr = str; + regexp.inst = vector.New(0); + error = regexp.doParse(); + return; +} + +// MustCompile is like Compile but panics if the expression cannot be parsed. +// It simplifies safe initialization of global variables holding compiled regular +// expressions. +func MustCompile(str string) *Regexp { + regexp, error := Compile(str); + if error != nil { + panicln(`regexp: compiling "`, str, `": `, error); + } + return regexp; } type state struct {