зеркало из https://github.com/golang/build.git
buildlet: fix Exec to return ErrTimeout on timeout
The coordinator relies on Exec reporting that the given timeout was exceeded in order to mark a build as failed instead of retrying it. A refactor resulted in Exec no longer doing that, despite what its documentation promises, so fix that. Also add a test since evidence shows that catching a regression can be helpful. For golang/go#42699. Updates golang/go#35707. Change-Id: Iacef90b83e7b81fad88a33baa6489d5157e3528f Reviewed-on: https://go-review.googlesource.com/c/build/+/407555 Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Родитель
a3f1e410a4
Коммит
33d38b8f07
|
@ -509,6 +509,8 @@ type ExecOpts struct {
|
|||
OnStartExec func()
|
||||
}
|
||||
|
||||
// ErrTimeout is a sentinel error that represents that waiting
|
||||
// for a command to complete has exceeded the given timeout.
|
||||
var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
|
||||
|
||||
// Exec runs cmd on the buildlet.
|
||||
|
@ -519,8 +521,8 @@ var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
|
|||
// seen to completition. If execErr is non-nil, the remoteErr is
|
||||
// meaningless.
|
||||
//
|
||||
// If the context's deadline is exceeded, the returned execErr is
|
||||
// ErrTimeout.
|
||||
// If the context's deadline is exceeded while waiting for the command
|
||||
// to complete, the returned execErr is ErrTimeout.
|
||||
func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr, execErr error) {
|
||||
var mode string
|
||||
if opts.SystemLevel {
|
||||
|
@ -553,10 +555,11 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
|||
// (Atlanta, Paris, Sydney, etc.) the reverse buildlet is:
|
||||
res, err := c.doHeaderTimeout(req, 20*time.Second)
|
||||
if err == errHeaderTimeout {
|
||||
// If we don't see headers after all that time,
|
||||
// consider the buildlet to be unhealthy.
|
||||
c.MarkBroken()
|
||||
return nil, errors.New("buildlet: timeout waiting for exec header response")
|
||||
}
|
||||
if err != nil {
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
@ -577,7 +580,7 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
|||
out = ioutil.Discard
|
||||
}
|
||||
if _, err := io.Copy(out, res.Body); err != nil {
|
||||
resc <- errs{execErr: fmt.Errorf("error copying response: %v", err)}
|
||||
resc <- errs{execErr: fmt.Errorf("error copying response: %w", err)}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -600,10 +603,15 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
|||
select {
|
||||
case res := <-resc:
|
||||
if res.execErr != nil {
|
||||
// Note: We've historically marked the buildlet as unhealthy after
|
||||
// reaching any kind of execution error, even when it's a remote command
|
||||
// execution timeout (see use of ErrTimeout below).
|
||||
// This is certainly on the safer side of avoiding false positive signal,
|
||||
// but maybe someday we'll want to start to rely on the buildlet to report
|
||||
// such a condition and not mark it as unhealthy.
|
||||
|
||||
c.MarkBroken()
|
||||
if res.execErr == context.DeadlineExceeded {
|
||||
// Historical pre-context value.
|
||||
// TODO: update docs & callers to just use the context value.
|
||||
if errors.Is(res.execErr, context.DeadlineExceeded) {
|
||||
res.execErr = ErrTimeout
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,10 +7,12 @@ package buildlet
|
|||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
@ -171,3 +173,55 @@ func createKeyPair(t *testing.T) KeyPair {
|
|||
}
|
||||
return kp
|
||||
}
|
||||
|
||||
// Test that Exec returns ErrTimeout upon reaching the context timeout
|
||||
// during command execution, as its documentation promises.
|
||||
func TestExecTimeoutError(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/status", func(w http.ResponseWriter, req *http.Request) {
|
||||
json.NewEncoder(w).Encode(Status{})
|
||||
})
|
||||
mux.HandleFunc("/exec", func(w http.ResponseWriter, req *http.Request) {
|
||||
w.Write([]byte("."))
|
||||
w.(http.Flusher).Flush() // /exec needs to flush headers right away.
|
||||
<-req.Context().Done() // Simulate that execution hangs, so no more output.
|
||||
})
|
||||
ts := httptest.NewServer(mux)
|
||||
defer ts.Close()
|
||||
u, err := url.Parse(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unable to parse http server url %s", err)
|
||||
}
|
||||
cl := NewClient(u.Host, NoKeyPair)
|
||||
defer cl.Close()
|
||||
|
||||
// Use a custom context that reports context.DeadlineExceeded
|
||||
// after Exec starts command execution. (context.WithTimeout
|
||||
// requires us to select an arbitrary duration, which might
|
||||
// not be long enough or will make the test take too long.)
|
||||
ctx := deadlineOnDemandContext{
|
||||
Context: context.Background(),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
_, execErr := cl.Exec(ctx, "./bin/test", ExecOpts{
|
||||
OnStartExec: func() { close(ctx.done) },
|
||||
})
|
||||
if execErr != ErrTimeout {
|
||||
t.Errorf("cl.Exec error = %v; want %v", execErr, ErrTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
type deadlineOnDemandContext struct {
|
||||
context.Context
|
||||
done chan struct{}
|
||||
}
|
||||
|
||||
func (c deadlineOnDemandContext) Done() <-chan struct{} { return c.done }
|
||||
func (c deadlineOnDemandContext) Err() error {
|
||||
select {
|
||||
default:
|
||||
return nil
|
||||
case <-c.done:
|
||||
return context.DeadlineExceeded
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче