buildletclient: simplify Close

Now Close always means destroy.

The old code & API was a half-baked implementation of a (lack of)
design where there was a difference between being done with a buildlet
(with it possibly being reused by somebody else?) and you wanting to
nuke it completely. Unfortunately this just grew messier and more
broken over time.

This attempts to clean it all up.

We can add the sharing ideas back later when there's actually a design
and implementation. (We're not losing anything with this CL, because
nothing ever shared buildlets)

In fact, this CL fixes a problem where reverse buildlets weren't
getting their underlying net.Conns closed when their healthchecks
failed, leading to 4+ hour (and counting) build hangs due to buildlets
getting killed at inopportune moments (e.g. me testing running a
reverse buildlet on my home mac to help out with the dashboard
backlog)

Change-Id: I07be09f4d5f0f09d35e51e41c48b1296b71bb9b5
Reviewed-on: https://go-review.googlesource.com/14585
Reviewed-by: David Crawshaw <crawshaw@golang.org>
Reviewed-by: Andrew Gerrand <adg@golang.org>
This commit is contained in:
Brad Fitzpatrick 2015-09-15 22:18:37 +00:00 коммит произвёл Brad Fitzpatrick
Родитель 4483ca5674
Коммит 8bad2a8c99
7 изменённых файлов: 50 добавлений и 56 удалений

Просмотреть файл

@ -48,25 +48,38 @@ func (c *Client) setCommon() {
c.peerDead = make(chan struct{})
}
// SetCloseFunc sets a function to be called when c.Close is called.
// SetCloseFunc must not be called concurrently with Close.
func (c *Client) SetCloseFunc(fn func() error) {
c.closeFunc = fn
// SetOnHeartbeatFailure sets a function to be called when heartbeats
// against this builder fail, or when the client is destroyed with
// Close. The function fn is never called more than once.
// SetOnHeartbeatFailure must be set before any use of the buildlet.
func (c *Client) SetOnHeartbeatFailure(fn func()) {
c.heartbeatFailure = fn
}
var ErrClosed = errors.New("buildlet: Client closed")
// Closes destroys and closes down the buildlet, destroying all state
// immediately.
func (c *Client) Close() error {
c.setPeerDead(ErrClosed) // TODO(bradfitz): split concept of closed vs. broken?
var err error
if c.closeFunc != nil {
err = c.closeFunc()
c.closeFunc = nil
}
return err
c.closeOnce.Do(func() {
// Send a best-effort notification to the server to destroy itself.
// Don't want too long (since it's likely in a broken state anyway).
// Ignore the return value, since we're about to forcefully destroy
// it anyway.
req, err := http.NewRequest("POST", c.URL()+"/halt", nil)
if err != nil {
// ignore.
} else {
_, err = c.doHeaderTimeout(req, 2*time.Second)
}
if err == nil {
err = ErrClosed
}
c.setPeerDead(err) // which will also cause c.heartbeatFailure to run
})
return nil
}
// To be called only via c.setPeerDeadOnce.Do(s.setPeerDead)
func (c *Client) setPeerDead(err error) {
c.setPeerDeadOnce.Do(func() {
c.MarkBroken()
@ -112,9 +125,10 @@ type Client struct {
password string // basic auth password or empty for none
remoteBuildlet string // non-empty if for remote buildlets
closeFunc func() error
desc string
heartbeatFailure func() // optional
desc string
closeOnce sync.Once
initHeartbeatOnce sync.Once
setPeerDeadOnce sync.Once
peerDead chan struct{} // closed on peer death
@ -214,17 +228,20 @@ func (c *Client) heartbeatLoop() {
for {
select {
case <-c.peerDead:
// Already dead by something else.
// Most likely: c.Close was called.
// Dead for whatever reason (heartbeat, remote
// side closed, caller Closed
// normally). Regardless, we call the
// heartbeatFailure func if set.
if c.heartbeatFailure != nil {
c.heartbeatFailure()
}
return
case <-time.After(10 * time.Second):
t0 := time.Now()
if _, err := c.Status(); err != nil {
failInARow++
if failInARow == 3 {
c.MarkBroken()
c.setPeerDead(fmt.Errorf("Buildlet %v failed heartbeat after %v; marking dead; err=%v", c, time.Since(t0), err))
return
}
} else {
failInARow = 0
@ -491,15 +508,6 @@ func (c *Client) Exec(cmd string, opts ExecOpts) (remoteErr, execErr error) {
}
}
// Destroy shuts down the buildlet, destroying all state immediately.
func (c *Client) Destroy() error {
req, err := http.NewRequest("POST", c.URL()+"/halt", nil)
if err != nil {
return err
}
return c.doOK(req)
}
// RemoveAll deletes the provided paths, relative to the work directory.
func (c *Client) RemoveAll(paths ...string) error {
if len(paths) == 0 {
@ -516,13 +524,14 @@ func (c *Client) RemoveAll(paths ...string) error {
// DestroyVM shuts down the buildlet and destroys the VM instance.
func (c *Client) DestroyVM(ts oauth2.TokenSource, proj, zone, instance string) error {
// TODO(bradfitz): move GCE stuff out of this package?
gceErrc := make(chan error, 1)
buildletErrc := make(chan error, 1)
go func() {
gceErrc <- DestroyVM(ts, proj, zone, instance)
}()
go func() {
buildletErrc <- c.Destroy()
buildletErrc <- c.Close()
}()
timeout := time.NewTimer(5 * time.Second)
defer timeout.Stop()

Просмотреть файл

@ -1240,7 +1240,6 @@ func (st *buildStatus) build() error {
return fmt.Errorf("failed to get a buildlet: %v", err)
}
defer bc.Close()
defer nukeIfBroken(bc)
st.mu.Lock()
st.bc = bc
st.mu.Unlock()
@ -1966,7 +1965,6 @@ func (st *buildStatus) runTests(helpers <-chan *buildlet.Client) (remoteErr, err
go func(bc *buildlet.Client) {
defer st.logEventTime("closed_helper", bc.Name())
defer bc.Close()
defer nukeIfBroken(bc)
if devPause {
defer time.Sleep(5 * time.Minute)
defer st.logEventTime("DEV_HELPER_SLEEP", bc.Name())
@ -2642,12 +2640,4 @@ func getSourceTgzFromURL(source, repo, rev, urlStr string) (tgz []byte, err erro
return slurp, nil
}
func nukeIfBroken(bc *buildlet.Client) {
if bc.IsBroken() {
// It may not have come from the reverse pool, but it's harmless if
// it didn't.
reversePool.nukeBuildlet(bc)
}
}
var nl = []byte("\n")

Просмотреть файл

@ -266,7 +266,9 @@ func (p *gceBuildletPool) GetBuildlet(cancel Cancel, typ, rev string, el eventTi
return nil, err
}
bc.SetDescription("GCE VM: " + instName)
bc.SetCloseFunc(func() error { return p.putBuildlet(bc, typ, instName) })
bc.SetOnHeartbeatFailure(func() {
p.putBuildlet(bc, typ, instName)
})
return bc, nil
}
@ -274,8 +276,12 @@ func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, typ, instName string)
// TODO(bradfitz): add the buildlet to a freelist (of max N
// items) for up to 10 minutes since when it got started if
// it's never seen a command execution failure, and we can
// wipe all its disk content. (perhaps wipe its disk content when
// it's retrieved, not put back on the freelist)
// wipe all its disk content? (perhaps wipe its disk content
// when it's retrieved, like the reverse buildlet pool) But
// this will require re-introducing a distinction in the
// buildlet client library between Close, Destroy/Halt, and
// tracking execution errors. That was all half-baked before
// and thus removed. Now Close always destroys everything.
deleteVM(projectZone, instName)
p.setInstanceUsed(instName, false)

Просмотреть файл

@ -66,11 +66,6 @@ func addRemoteBuildlet(rb *remoteBuildlet) (name string) {
}
}
func destroyCloseBuildlet(bc *buildlet.Client) {
bc.Destroy()
bc.Close()
}
func expireBuildlets() {
defer cleanTimer.Reset(remoteBuildletCleanInterval)
remoteBuildlets.Lock()
@ -78,7 +73,7 @@ func expireBuildlets() {
now := time.Now()
for name, rb := range remoteBuildlets.m {
if !rb.Expires.IsZero() && rb.Expires.Before(now) {
go destroyCloseBuildlet(rb.buildlet)
go rb.buildlet.Close()
delete(remoteBuildlets.m, name)
}
}
@ -261,7 +256,7 @@ func proxyBuildletHTTP(w http.ResponseWriter, r *http.Request) {
}
if r.Method == "POST" && r.URL.Path == "/halt" {
err := rb.buildlet.Destroy()
err := rb.buildlet.Close()
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}

Просмотреть файл

@ -79,10 +79,6 @@ func (p *reverseBuildletPool) tryToGrab(machineType string) (*buildlet.Client, e
// Found an unused match.
b.inUseAs = machineType
b.inUseTime = time.Now()
b.client.SetCloseFunc(func() error {
p.nukeBuildlet(b.client)
return nil
})
return b.client, nil
}
}
@ -441,6 +437,7 @@ func handleReverse(w http.ResponseWriter, r *http.Request) {
},
})
client.SetDescription(fmt.Sprintf("reverse peer %s/%s for modes %v", hostname, r.RemoteAddr, modes))
client.SetOnHeartbeatFailure(func() { conn.Close() })
tstatus := time.Now()
status, err := client.Status()
if err != nil {

Просмотреть файл

@ -27,5 +27,5 @@ func destroy(args []string) error {
if err != nil {
return err
}
return bc.Destroy()
return bc.Close()
}

Просмотреть файл

@ -216,9 +216,6 @@ func (b *Build) buildlet() (*buildlet.Client, error) {
if err != nil {
return nil, err
}
bc.SetCloseFunc(func() error {
return bc.Destroy()
})
return bc, nil
}