diff --git a/buildlet/buildletclient.go b/buildlet/buildletclient.go index 536dbffc..a3a98322 100644 --- a/buildlet/buildletclient.go +++ b/buildlet/buildletclient.go @@ -48,25 +48,38 @@ func (c *Client) setCommon() { c.peerDead = make(chan struct{}) } -// SetCloseFunc sets a function to be called when c.Close is called. -// SetCloseFunc must not be called concurrently with Close. -func (c *Client) SetCloseFunc(fn func() error) { - c.closeFunc = fn +// SetOnHeartbeatFailure sets a function to be called when heartbeats +// against this builder fail, or when the client is destroyed with +// Close. The function fn is never called more than once. +// SetOnHeartbeatFailure must be set before any use of the buildlet. +func (c *Client) SetOnHeartbeatFailure(fn func()) { + c.heartbeatFailure = fn } var ErrClosed = errors.New("buildlet: Client closed") +// Closes destroys and closes down the buildlet, destroying all state +// immediately. func (c *Client) Close() error { - c.setPeerDead(ErrClosed) // TODO(bradfitz): split concept of closed vs. broken? - var err error - if c.closeFunc != nil { - err = c.closeFunc() - c.closeFunc = nil - } - return err + c.closeOnce.Do(func() { + // Send a best-effort notification to the server to destroy itself. + // Don't want too long (since it's likely in a broken state anyway). + // Ignore the return value, since we're about to forcefully destroy + // it anyway. + req, err := http.NewRequest("POST", c.URL()+"/halt", nil) + if err != nil { + // ignore. + } else { + _, err = c.doHeaderTimeout(req, 2*time.Second) + } + if err == nil { + err = ErrClosed + } + c.setPeerDead(err) // which will also cause c.heartbeatFailure to run + }) + return nil } -// To be called only via c.setPeerDeadOnce.Do(s.setPeerDead) func (c *Client) setPeerDead(err error) { c.setPeerDeadOnce.Do(func() { c.MarkBroken() @@ -112,9 +125,10 @@ type Client struct { password string // basic auth password or empty for none remoteBuildlet string // non-empty if for remote buildlets - closeFunc func() error - desc string + heartbeatFailure func() // optional + desc string + closeOnce sync.Once initHeartbeatOnce sync.Once setPeerDeadOnce sync.Once peerDead chan struct{} // closed on peer death @@ -214,17 +228,20 @@ func (c *Client) heartbeatLoop() { for { select { case <-c.peerDead: - // Already dead by something else. - // Most likely: c.Close was called. + // Dead for whatever reason (heartbeat, remote + // side closed, caller Closed + // normally). Regardless, we call the + // heartbeatFailure func if set. + if c.heartbeatFailure != nil { + c.heartbeatFailure() + } return case <-time.After(10 * time.Second): t0 := time.Now() if _, err := c.Status(); err != nil { failInARow++ if failInARow == 3 { - c.MarkBroken() c.setPeerDead(fmt.Errorf("Buildlet %v failed heartbeat after %v; marking dead; err=%v", c, time.Since(t0), err)) - return } } else { failInARow = 0 @@ -491,15 +508,6 @@ func (c *Client) Exec(cmd string, opts ExecOpts) (remoteErr, execErr error) { } } -// Destroy shuts down the buildlet, destroying all state immediately. -func (c *Client) Destroy() error { - req, err := http.NewRequest("POST", c.URL()+"/halt", nil) - if err != nil { - return err - } - return c.doOK(req) -} - // RemoveAll deletes the provided paths, relative to the work directory. func (c *Client) RemoveAll(paths ...string) error { if len(paths) == 0 { @@ -516,13 +524,14 @@ func (c *Client) RemoveAll(paths ...string) error { // DestroyVM shuts down the buildlet and destroys the VM instance. func (c *Client) DestroyVM(ts oauth2.TokenSource, proj, zone, instance string) error { + // TODO(bradfitz): move GCE stuff out of this package? gceErrc := make(chan error, 1) buildletErrc := make(chan error, 1) go func() { gceErrc <- DestroyVM(ts, proj, zone, instance) }() go func() { - buildletErrc <- c.Destroy() + buildletErrc <- c.Close() }() timeout := time.NewTimer(5 * time.Second) defer timeout.Stop() diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go index a3447141..4a79e52d 100644 --- a/cmd/coordinator/coordinator.go +++ b/cmd/coordinator/coordinator.go @@ -1240,7 +1240,6 @@ func (st *buildStatus) build() error { return fmt.Errorf("failed to get a buildlet: %v", err) } defer bc.Close() - defer nukeIfBroken(bc) st.mu.Lock() st.bc = bc st.mu.Unlock() @@ -1966,7 +1965,6 @@ func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err go func(bc *buildlet.Client) { defer st.logEventTime("closed_helper", bc.Name()) defer bc.Close() - defer nukeIfBroken(bc) if devPause { defer time.Sleep(5 * time.Minute) defer st.logEventTime("DEV_HELPER_SLEEP", bc.Name()) @@ -2642,12 +2640,4 @@ func getSourceTgzFromURL(source, repo, rev, urlStr string) (tgz []byte, err erro return slurp, nil } -func nukeIfBroken(bc *buildlet.Client) { - if bc.IsBroken() { - // It may not have come from the reverse pool, but it's harmless if - // it didn't. - reversePool.nukeBuildlet(bc) - } -} - var nl = []byte("\n") diff --git a/cmd/coordinator/gce.go b/cmd/coordinator/gce.go index fc75a02f..a2b87d4d 100644 --- a/cmd/coordinator/gce.go +++ b/cmd/coordinator/gce.go @@ -266,7 +266,9 @@ func (p *gceBuildletPool) GetBuildlet(cancel Cancel, typ, rev string, el eventTi return nil, err } bc.SetDescription("GCE VM: " + instName) - bc.SetCloseFunc(func() error { return p.putBuildlet(bc, typ, instName) }) + bc.SetOnHeartbeatFailure(func() { + p.putBuildlet(bc, typ, instName) + }) return bc, nil } @@ -274,8 +276,12 @@ func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, typ, instName string) // TODO(bradfitz): add the buildlet to a freelist (of max N // items) for up to 10 minutes since when it got started if // it's never seen a command execution failure, and we can - // wipe all its disk content. (perhaps wipe its disk content when - // it's retrieved, not put back on the freelist) + // wipe all its disk content? (perhaps wipe its disk content + // when it's retrieved, like the reverse buildlet pool) But + // this will require re-introducing a distinction in the + // buildlet client library between Close, Destroy/Halt, and + // tracking execution errors. That was all half-baked before + // and thus removed. Now Close always destroys everything. deleteVM(projectZone, instName) p.setInstanceUsed(instName, false) diff --git a/cmd/coordinator/remote.go b/cmd/coordinator/remote.go index bf6b1f59..41dd59d6 100644 --- a/cmd/coordinator/remote.go +++ b/cmd/coordinator/remote.go @@ -66,11 +66,6 @@ func addRemoteBuildlet(rb *remoteBuildlet) (name string) { } } -func destroyCloseBuildlet(bc *buildlet.Client) { - bc.Destroy() - bc.Close() -} - func expireBuildlets() { defer cleanTimer.Reset(remoteBuildletCleanInterval) remoteBuildlets.Lock() @@ -78,7 +73,7 @@ func expireBuildlets() { now := time.Now() for name, rb := range remoteBuildlets.m { if !rb.Expires.IsZero() && rb.Expires.Before(now) { - go destroyCloseBuildlet(rb.buildlet) + go rb.buildlet.Close() delete(remoteBuildlets.m, name) } } @@ -261,7 +256,7 @@ func proxyBuildletHTTP(w http.ResponseWriter, r *http.Request) { } if r.Method == "POST" && r.URL.Path == "/halt" { - err := rb.buildlet.Destroy() + err := rb.buildlet.Close() if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } diff --git a/cmd/coordinator/reverse.go b/cmd/coordinator/reverse.go index 96e79537..36bd9af6 100644 --- a/cmd/coordinator/reverse.go +++ b/cmd/coordinator/reverse.go @@ -79,10 +79,6 @@ func (p *reverseBuildletPool) tryToGrab(machineType string) (*buildlet.Client, e // Found an unused match. b.inUseAs = machineType b.inUseTime = time.Now() - b.client.SetCloseFunc(func() error { - p.nukeBuildlet(b.client) - return nil - }) return b.client, nil } } @@ -441,6 +437,7 @@ func handleReverse(w http.ResponseWriter, r *http.Request) { }, }) client.SetDescription(fmt.Sprintf("reverse peer %s/%s for modes %v", hostname, r.RemoteAddr, modes)) + client.SetOnHeartbeatFailure(func() { conn.Close() }) tstatus := time.Now() status, err := client.Status() if err != nil { diff --git a/cmd/gomote/destroy.go b/cmd/gomote/destroy.go index 27a2bf2e..9fbc0161 100644 --- a/cmd/gomote/destroy.go +++ b/cmd/gomote/destroy.go @@ -27,5 +27,5 @@ func destroy(args []string) error { if err != nil { return err } - return bc.Destroy() + return bc.Close() } diff --git a/cmd/release/release.go b/cmd/release/release.go index df046220..5b8fc7db 100644 --- a/cmd/release/release.go +++ b/cmd/release/release.go @@ -216,9 +216,6 @@ func (b *Build) buildlet() (*buildlet.Client, error) { if err != nil { return nil, err } - bc.SetCloseFunc(func() error { - return bc.Destroy() - }) return bc, nil }