зеркало из https://github.com/golang/build.git
buildletclient: simplify Close
Now Close always means destroy. The old code & API was a half-baked implementation of a (lack of) design where there was a difference between being done with a buildlet (with it possibly being reused by somebody else?) and you wanting to nuke it completely. Unfortunately this just grew messier and more broken over time. This attempts to clean it all up. We can add the sharing ideas back later when there's actually a design and implementation. (We're not losing anything with this CL, because nothing ever shared buildlets) In fact, this CL fixes a problem where reverse buildlets weren't getting their underlying net.Conns closed when their healthchecks failed, leading to 4+ hour (and counting) build hangs due to buildlets getting killed at inopportune moments (e.g. me testing running a reverse buildlet on my home mac to help out with the dashboard backlog) Change-Id: I07be09f4d5f0f09d35e51e41c48b1296b71bb9b5 Reviewed-on: https://go-review.googlesource.com/14585 Reviewed-by: David Crawshaw <crawshaw@golang.org> Reviewed-by: Andrew Gerrand <adg@golang.org>
This commit is contained in:
Родитель
4483ca5674
Коммит
8bad2a8c99
|
@ -48,25 +48,38 @@ func (c *Client) setCommon() {
|
|||
c.peerDead = make(chan struct{})
|
||||
}
|
||||
|
||||
// SetCloseFunc sets a function to be called when c.Close is called.
|
||||
// SetCloseFunc must not be called concurrently with Close.
|
||||
func (c *Client) SetCloseFunc(fn func() error) {
|
||||
c.closeFunc = fn
|
||||
// SetOnHeartbeatFailure sets a function to be called when heartbeats
|
||||
// against this builder fail, or when the client is destroyed with
|
||||
// Close. The function fn is never called more than once.
|
||||
// SetOnHeartbeatFailure must be set before any use of the buildlet.
|
||||
func (c *Client) SetOnHeartbeatFailure(fn func()) {
|
||||
c.heartbeatFailure = fn
|
||||
}
|
||||
|
||||
var ErrClosed = errors.New("buildlet: Client closed")
|
||||
|
||||
// Closes destroys and closes down the buildlet, destroying all state
|
||||
// immediately.
|
||||
func (c *Client) Close() error {
|
||||
c.setPeerDead(ErrClosed) // TODO(bradfitz): split concept of closed vs. broken?
|
||||
var err error
|
||||
if c.closeFunc != nil {
|
||||
err = c.closeFunc()
|
||||
c.closeFunc = nil
|
||||
}
|
||||
return err
|
||||
c.closeOnce.Do(func() {
|
||||
// Send a best-effort notification to the server to destroy itself.
|
||||
// Don't want too long (since it's likely in a broken state anyway).
|
||||
// Ignore the return value, since we're about to forcefully destroy
|
||||
// it anyway.
|
||||
req, err := http.NewRequest("POST", c.URL()+"/halt", nil)
|
||||
if err != nil {
|
||||
// ignore.
|
||||
} else {
|
||||
_, err = c.doHeaderTimeout(req, 2*time.Second)
|
||||
}
|
||||
if err == nil {
|
||||
err = ErrClosed
|
||||
}
|
||||
c.setPeerDead(err) // which will also cause c.heartbeatFailure to run
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
// To be called only via c.setPeerDeadOnce.Do(s.setPeerDead)
|
||||
func (c *Client) setPeerDead(err error) {
|
||||
c.setPeerDeadOnce.Do(func() {
|
||||
c.MarkBroken()
|
||||
|
@ -112,9 +125,10 @@ type Client struct {
|
|||
password string // basic auth password or empty for none
|
||||
remoteBuildlet string // non-empty if for remote buildlets
|
||||
|
||||
closeFunc func() error
|
||||
desc string
|
||||
heartbeatFailure func() // optional
|
||||
desc string
|
||||
|
||||
closeOnce sync.Once
|
||||
initHeartbeatOnce sync.Once
|
||||
setPeerDeadOnce sync.Once
|
||||
peerDead chan struct{} // closed on peer death
|
||||
|
@ -214,17 +228,20 @@ func (c *Client) heartbeatLoop() {
|
|||
for {
|
||||
select {
|
||||
case <-c.peerDead:
|
||||
// Already dead by something else.
|
||||
// Most likely: c.Close was called.
|
||||
// Dead for whatever reason (heartbeat, remote
|
||||
// side closed, caller Closed
|
||||
// normally). Regardless, we call the
|
||||
// heartbeatFailure func if set.
|
||||
if c.heartbeatFailure != nil {
|
||||
c.heartbeatFailure()
|
||||
}
|
||||
return
|
||||
case <-time.After(10 * time.Second):
|
||||
t0 := time.Now()
|
||||
if _, err := c.Status(); err != nil {
|
||||
failInARow++
|
||||
if failInARow == 3 {
|
||||
c.MarkBroken()
|
||||
c.setPeerDead(fmt.Errorf("Buildlet %v failed heartbeat after %v; marking dead; err=%v", c, time.Since(t0), err))
|
||||
return
|
||||
}
|
||||
} else {
|
||||
failInARow = 0
|
||||
|
@ -491,15 +508,6 @@ func (c *Client) Exec(cmd string, opts ExecOpts) (remoteErr, execErr error) {
|
|||
}
|
||||
}
|
||||
|
||||
// Destroy shuts down the buildlet, destroying all state immediately.
|
||||
func (c *Client) Destroy() error {
|
||||
req, err := http.NewRequest("POST", c.URL()+"/halt", nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.doOK(req)
|
||||
}
|
||||
|
||||
// RemoveAll deletes the provided paths, relative to the work directory.
|
||||
func (c *Client) RemoveAll(paths ...string) error {
|
||||
if len(paths) == 0 {
|
||||
|
@ -516,13 +524,14 @@ func (c *Client) RemoveAll(paths ...string) error {
|
|||
|
||||
// DestroyVM shuts down the buildlet and destroys the VM instance.
|
||||
func (c *Client) DestroyVM(ts oauth2.TokenSource, proj, zone, instance string) error {
|
||||
// TODO(bradfitz): move GCE stuff out of this package?
|
||||
gceErrc := make(chan error, 1)
|
||||
buildletErrc := make(chan error, 1)
|
||||
go func() {
|
||||
gceErrc <- DestroyVM(ts, proj, zone, instance)
|
||||
}()
|
||||
go func() {
|
||||
buildletErrc <- c.Destroy()
|
||||
buildletErrc <- c.Close()
|
||||
}()
|
||||
timeout := time.NewTimer(5 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
|
|
@ -1240,7 +1240,6 @@ func (st *buildStatus) build() error {
|
|||
return fmt.Errorf("failed to get a buildlet: %v", err)
|
||||
}
|
||||
defer bc.Close()
|
||||
defer nukeIfBroken(bc)
|
||||
st.mu.Lock()
|
||||
st.bc = bc
|
||||
st.mu.Unlock()
|
||||
|
@ -1966,7 +1965,6 @@ func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err
|
|||
go func(bc *buildlet.Client) {
|
||||
defer st.logEventTime("closed_helper", bc.Name())
|
||||
defer bc.Close()
|
||||
defer nukeIfBroken(bc)
|
||||
if devPause {
|
||||
defer time.Sleep(5 * time.Minute)
|
||||
defer st.logEventTime("DEV_HELPER_SLEEP", bc.Name())
|
||||
|
@ -2642,12 +2640,4 @@ func getSourceTgzFromURL(source, repo, rev, urlStr string) (tgz []byte, err erro
|
|||
return slurp, nil
|
||||
}
|
||||
|
||||
func nukeIfBroken(bc *buildlet.Client) {
|
||||
if bc.IsBroken() {
|
||||
// It may not have come from the reverse pool, but it's harmless if
|
||||
// it didn't.
|
||||
reversePool.nukeBuildlet(bc)
|
||||
}
|
||||
}
|
||||
|
||||
var nl = []byte("\n")
|
||||
|
|
|
@ -266,7 +266,9 @@ func (p *gceBuildletPool) GetBuildlet(cancel Cancel, typ, rev string, el eventTi
|
|||
return nil, err
|
||||
}
|
||||
bc.SetDescription("GCE VM: " + instName)
|
||||
bc.SetCloseFunc(func() error { return p.putBuildlet(bc, typ, instName) })
|
||||
bc.SetOnHeartbeatFailure(func() {
|
||||
p.putBuildlet(bc, typ, instName)
|
||||
})
|
||||
return bc, nil
|
||||
}
|
||||
|
||||
|
@ -274,8 +276,12 @@ func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, typ, instName string)
|
|||
// TODO(bradfitz): add the buildlet to a freelist (of max N
|
||||
// items) for up to 10 minutes since when it got started if
|
||||
// it's never seen a command execution failure, and we can
|
||||
// wipe all its disk content. (perhaps wipe its disk content when
|
||||
// it's retrieved, not put back on the freelist)
|
||||
// wipe all its disk content? (perhaps wipe its disk content
|
||||
// when it's retrieved, like the reverse buildlet pool) But
|
||||
// this will require re-introducing a distinction in the
|
||||
// buildlet client library between Close, Destroy/Halt, and
|
||||
// tracking execution errors. That was all half-baked before
|
||||
// and thus removed. Now Close always destroys everything.
|
||||
deleteVM(projectZone, instName)
|
||||
p.setInstanceUsed(instName, false)
|
||||
|
||||
|
|
|
@ -66,11 +66,6 @@ func addRemoteBuildlet(rb *remoteBuildlet) (name string) {
|
|||
}
|
||||
}
|
||||
|
||||
func destroyCloseBuildlet(bc *buildlet.Client) {
|
||||
bc.Destroy()
|
||||
bc.Close()
|
||||
}
|
||||
|
||||
func expireBuildlets() {
|
||||
defer cleanTimer.Reset(remoteBuildletCleanInterval)
|
||||
remoteBuildlets.Lock()
|
||||
|
@ -78,7 +73,7 @@ func expireBuildlets() {
|
|||
now := time.Now()
|
||||
for name, rb := range remoteBuildlets.m {
|
||||
if !rb.Expires.IsZero() && rb.Expires.Before(now) {
|
||||
go destroyCloseBuildlet(rb.buildlet)
|
||||
go rb.buildlet.Close()
|
||||
delete(remoteBuildlets.m, name)
|
||||
}
|
||||
}
|
||||
|
@ -261,7 +256,7 @@ func proxyBuildletHTTP(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
|
||||
if r.Method == "POST" && r.URL.Path == "/halt" {
|
||||
err := rb.buildlet.Destroy()
|
||||
err := rb.buildlet.Close()
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
|
|
|
@ -79,10 +79,6 @@ func (p *reverseBuildletPool) tryToGrab(machineType string) (*buildlet.Client, e
|
|||
// Found an unused match.
|
||||
b.inUseAs = machineType
|
||||
b.inUseTime = time.Now()
|
||||
b.client.SetCloseFunc(func() error {
|
||||
p.nukeBuildlet(b.client)
|
||||
return nil
|
||||
})
|
||||
return b.client, nil
|
||||
}
|
||||
}
|
||||
|
@ -441,6 +437,7 @@ func handleReverse(w http.ResponseWriter, r *http.Request) {
|
|||
},
|
||||
})
|
||||
client.SetDescription(fmt.Sprintf("reverse peer %s/%s for modes %v", hostname, r.RemoteAddr, modes))
|
||||
client.SetOnHeartbeatFailure(func() { conn.Close() })
|
||||
tstatus := time.Now()
|
||||
status, err := client.Status()
|
||||
if err != nil {
|
||||
|
|
|
@ -27,5 +27,5 @@ func destroy(args []string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return bc.Destroy()
|
||||
return bc.Close()
|
||||
}
|
||||
|
|
|
@ -216,9 +216,6 @@ func (b *Build) buildlet() (*buildlet.Client, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bc.SetCloseFunc(func() error {
|
||||
return bc.Destroy()
|
||||
})
|
||||
return bc, nil
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче