зеркало из https://github.com/CryptoPro/go.git
take goroutines out of regexp so they can be created
during initialization. R=rsc CC=go-dev http://go/go-review/1016023
This commit is contained in:
Родитель
e6f85af2bb
Коммит
acc54b6c70
|
@ -27,7 +27,6 @@ import (
|
|||
"container/vector";
|
||||
"io";
|
||||
"os";
|
||||
"runtime";
|
||||
"utf8";
|
||||
)
|
||||
|
||||
|
@ -72,8 +71,6 @@ func (c *common) setIndex(i int) { c._index = i }
|
|||
// The public interface is entirely through methods.
|
||||
type Regexp struct {
|
||||
expr string; // the original expression
|
||||
ch chan<- *Regexp; // reply channel when we're done
|
||||
error os.Error; // compile- or run-time error; nil if OK
|
||||
inst *vector.Vector;
|
||||
start instr;
|
||||
nbra int; // number of brackets in expression, for subexpressions
|
||||
|
@ -244,13 +241,6 @@ type _Nop struct {
|
|||
func (nop *_Nop) kind() int { return _NOP }
|
||||
func (nop *_Nop) print() { print("nop") }
|
||||
|
||||
// report error and exit compiling/executing goroutine
|
||||
func (re *Regexp) setError(err os.Error) {
|
||||
re.error = err;
|
||||
re.ch <- re;
|
||||
runtime.Goexit();
|
||||
}
|
||||
|
||||
func (re *Regexp) add(i instr) instr {
|
||||
i.setIndex(re.inst.Len());
|
||||
re.inst.Push(i);
|
||||
|
@ -259,6 +249,7 @@ func (re *Regexp) add(i instr) instr {
|
|||
|
||||
type parser struct {
|
||||
re *Regexp;
|
||||
error os.Error;
|
||||
nlpar int; // number of unclosed lpars
|
||||
pos int;
|
||||
ch int;
|
||||
|
@ -288,8 +279,6 @@ func newParser(re *Regexp) *parser {
|
|||
return p;
|
||||
}
|
||||
|
||||
var iNULL instr
|
||||
|
||||
func special(c int) bool {
|
||||
s := `\.+*?()|[]^$`;
|
||||
for i := 0; i < len(s); i++ {
|
||||
|
@ -321,7 +310,8 @@ func (p *parser) charClass() instr {
|
|||
switch c := p.c(); c {
|
||||
case ']', endOfFile:
|
||||
if left >= 0 {
|
||||
p.re.setError(ErrBadRange);
|
||||
p.error = ErrBadRange;
|
||||
return nil;
|
||||
}
|
||||
// Is it [^\n]?
|
||||
if cc.negate && cc.ranges.Len() == 2 &&
|
||||
|
@ -333,18 +323,21 @@ func (p *parser) charClass() instr {
|
|||
p.re.add(cc);
|
||||
return cc;
|
||||
case '-': // do this before backslash processing
|
||||
p.re.setError(ErrBadRange);
|
||||
p.error = ErrBadRange;
|
||||
return nil;
|
||||
case '\\':
|
||||
c = p.nextc();
|
||||
switch {
|
||||
case c == endOfFile:
|
||||
p.re.setError(ErrExtraneousBackslash);
|
||||
p.error = ErrExtraneousBackslash;
|
||||
return nil;
|
||||
case c == 'n':
|
||||
c = '\n';
|
||||
case specialcclass(c):
|
||||
// c is as delivered
|
||||
default:
|
||||
p.re.setError(ErrBadBackslash);
|
||||
p.error = ErrBadBackslash;
|
||||
return nil;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
|
@ -361,26 +354,37 @@ func (p *parser) charClass() instr {
|
|||
cc.addRange(left, c);
|
||||
left = -1;
|
||||
default:
|
||||
p.re.setError(ErrBadRange);
|
||||
p.error = ErrBadRange;
|
||||
return nil;
|
||||
}
|
||||
}
|
||||
}
|
||||
return iNULL
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *parser) term() (start, end instr) {
|
||||
// term() is the leaf of the recursion, so it's sufficient to pick off the
|
||||
// error state here for early exit.
|
||||
// The other functions (closure(), concatenation() etc.) assume
|
||||
// it's safe to recur to here.
|
||||
if p.error != nil {
|
||||
return
|
||||
}
|
||||
switch c := p.c(); c {
|
||||
case '|', endOfFile:
|
||||
return iNULL, iNULL;
|
||||
return nil, nil;
|
||||
case '*', '+':
|
||||
p.re.setError(ErrBareClosure);
|
||||
p.error = ErrBareClosure;
|
||||
return;
|
||||
case ')':
|
||||
if p.nlpar == 0 {
|
||||
p.re.setError(ErrUnmatchedRpar);
|
||||
p.error = ErrUnmatchedRpar;
|
||||
return;
|
||||
}
|
||||
return iNULL, iNULL;
|
||||
return nil, nil;
|
||||
case ']':
|
||||
p.re.setError(ErrUnmatchedRbkt);
|
||||
p.error = ErrUnmatchedRbkt;
|
||||
return;
|
||||
case '^':
|
||||
p.nextc();
|
||||
start = p.re.add(new(_Bot));
|
||||
|
@ -396,8 +400,12 @@ func (p *parser) term() (start, end instr) {
|
|||
case '[':
|
||||
p.nextc();
|
||||
start = p.charClass();
|
||||
if p.error != nil {
|
||||
return;
|
||||
}
|
||||
if p.c() != ']' {
|
||||
p.re.setError(ErrUnmatchedLbkt);
|
||||
p.error = ErrUnmatchedLbkt;
|
||||
return;
|
||||
}
|
||||
p.nextc();
|
||||
return start, start;
|
||||
|
@ -408,7 +416,8 @@ func (p *parser) term() (start, end instr) {
|
|||
nbra := p.re.nbra;
|
||||
start, end = p.regexp();
|
||||
if p.c() != ')' {
|
||||
p.re.setError(ErrUnmatchedLpar);
|
||||
p.error = ErrUnmatchedLpar;
|
||||
return;
|
||||
}
|
||||
p.nlpar--;
|
||||
p.nextc();
|
||||
|
@ -418,9 +427,10 @@ func (p *parser) term() (start, end instr) {
|
|||
p.re.add(ebra);
|
||||
bra.n = nbra;
|
||||
ebra.n = nbra;
|
||||
if start == iNULL {
|
||||
if end == iNULL {
|
||||
p.re.setError(ErrInternal)
|
||||
if start == nil {
|
||||
if end == nil {
|
||||
p.error = ErrInternal;
|
||||
return;
|
||||
}
|
||||
start = ebra
|
||||
} else {
|
||||
|
@ -432,13 +442,15 @@ func (p *parser) term() (start, end instr) {
|
|||
c = p.nextc();
|
||||
switch {
|
||||
case c == endOfFile:
|
||||
p.re.setError(ErrExtraneousBackslash);
|
||||
p.error = ErrExtraneousBackslash;
|
||||
return;
|
||||
case c == 'n':
|
||||
c = '\n';
|
||||
case special(c):
|
||||
// c is as delivered
|
||||
default:
|
||||
p.re.setError(ErrBadBackslash);
|
||||
p.error = ErrBadBackslash;
|
||||
return;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
|
@ -452,7 +464,7 @@ func (p *parser) term() (start, end instr) {
|
|||
|
||||
func (p *parser) closure() (start, end instr) {
|
||||
start, end = p.term();
|
||||
if start == iNULL {
|
||||
if start == nil || p.error != nil {
|
||||
return
|
||||
}
|
||||
switch p.c() {
|
||||
|
@ -487,23 +499,25 @@ func (p *parser) closure() (start, end instr) {
|
|||
}
|
||||
switch p.nextc() {
|
||||
case '*', '+', '?':
|
||||
p.re.setError(ErrBadClosure);
|
||||
p.error = ErrBadClosure;
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (p *parser) concatenation() (start, end instr) {
|
||||
start, end = iNULL, iNULL;
|
||||
for {
|
||||
nstart, nend := p.closure();
|
||||
if p.error != nil {
|
||||
return
|
||||
}
|
||||
switch {
|
||||
case nstart == iNULL: // end of this concatenation
|
||||
if start == iNULL { // this is the empty string
|
||||
case nstart == nil: // end of this concatenation
|
||||
if start == nil { // this is the empty string
|
||||
nop := p.re.add(new(_Nop));
|
||||
return nop, nop;
|
||||
}
|
||||
return;
|
||||
case start == iNULL: // this is first element of concatenation
|
||||
case start == nil: // this is first element of concatenation
|
||||
start, end = nstart, nend;
|
||||
default:
|
||||
end.setNext(nstart);
|
||||
|
@ -515,6 +529,9 @@ func (p *parser) concatenation() (start, end instr) {
|
|||
|
||||
func (p *parser) regexp() (start, end instr) {
|
||||
start, end = p.concatenation();
|
||||
if p.error != nil {
|
||||
return
|
||||
}
|
||||
for {
|
||||
switch p.c() {
|
||||
default:
|
||||
|
@ -522,6 +539,9 @@ func (p *parser) regexp() (start, end instr) {
|
|||
case '|':
|
||||
p.nextc();
|
||||
nstart, nend := p.concatenation();
|
||||
if p.error != nil {
|
||||
return
|
||||
}
|
||||
alt := new(_Alt);
|
||||
p.re.add(alt);
|
||||
alt.left = start;
|
||||
|
@ -569,11 +589,14 @@ func (re *Regexp) dump() {
|
|||
}
|
||||
}
|
||||
|
||||
func (re *Regexp) doParse() {
|
||||
func (re *Regexp) doParse() os.Error{
|
||||
p := newParser(re);
|
||||
start := new(_Start);
|
||||
re.add(start);
|
||||
s, e := p.regexp();
|
||||
if p.error != nil {
|
||||
return p.error;
|
||||
}
|
||||
start.setNext(s);
|
||||
re.start = start;
|
||||
e.setNext(re.add(new(_End)));
|
||||
|
@ -584,31 +607,32 @@ func (re *Regexp) doParse() {
|
|||
}
|
||||
|
||||
re.eliminateNops();
|
||||
|
||||
if debug {
|
||||
re.dump();
|
||||
println();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func compiler(str string, ch chan *Regexp) {
|
||||
re := new(Regexp);
|
||||
re.expr = str;
|
||||
re.inst = vector.New(0);
|
||||
re.ch = ch;
|
||||
re.doParse();
|
||||
ch <- re;
|
||||
return p.error;
|
||||
}
|
||||
|
||||
// Compile parses a regular expression and returns, if successful, a Regexp
|
||||
// object that can be used to match against text.
|
||||
func Compile(str string) (regexp *Regexp, error os.Error) {
|
||||
// Compile in a separate goroutine and wait for the result.
|
||||
ch := make(chan *Regexp);
|
||||
go compiler(str, ch);
|
||||
re := <-ch;
|
||||
return re, re.error
|
||||
regexp = new(Regexp);
|
||||
regexp.expr = str;
|
||||
regexp.inst = vector.New(0);
|
||||
error = regexp.doParse();
|
||||
return;
|
||||
}
|
||||
|
||||
// MustCompile is like Compile but panics if the expression cannot be parsed.
|
||||
// It simplifies safe initialization of global variables holding compiled regular
|
||||
// expressions.
|
||||
func MustCompile(str string) *Regexp {
|
||||
regexp, error := Compile(str);
|
||||
if error != nil {
|
||||
panicln(`regexp: compiling "`, str, `": `, error);
|
||||
}
|
||||
return regexp;
|
||||
}
|
||||
|
||||
type state struct {
|
||||
|
|
Загрузка…
Ссылка в новой задаче