зеркало из https://github.com/golang/build.git
buildlet: fix Exec to return ErrTimeout on timeout
The coordinator relies on Exec reporting that the given timeout was exceeded in order to mark a build as failed instead of retrying it. A refactor resulted in Exec no longer doing that, despite what its documentation promises, so fix that. Also add a test since evidence shows that catching a regression can be helpful. For golang/go#42699. Updates golang/go#35707. Change-Id: Iacef90b83e7b81fad88a33baa6489d5157e3528f Reviewed-on: https://go-review.googlesource.com/c/build/+/407555 Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Родитель
a3f1e410a4
Коммит
33d38b8f07
|
@ -509,6 +509,8 @@ type ExecOpts struct {
|
||||||
OnStartExec func()
|
OnStartExec func()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrTimeout is a sentinel error that represents that waiting
|
||||||
|
// for a command to complete has exceeded the given timeout.
|
||||||
var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
|
var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
|
||||||
|
|
||||||
// Exec runs cmd on the buildlet.
|
// Exec runs cmd on the buildlet.
|
||||||
|
@ -519,8 +521,8 @@ var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
|
||||||
// seen to completition. If execErr is non-nil, the remoteErr is
|
// seen to completition. If execErr is non-nil, the remoteErr is
|
||||||
// meaningless.
|
// meaningless.
|
||||||
//
|
//
|
||||||
// If the context's deadline is exceeded, the returned execErr is
|
// If the context's deadline is exceeded while waiting for the command
|
||||||
// ErrTimeout.
|
// to complete, the returned execErr is ErrTimeout.
|
||||||
func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr, execErr error) {
|
func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr, execErr error) {
|
||||||
var mode string
|
var mode string
|
||||||
if opts.SystemLevel {
|
if opts.SystemLevel {
|
||||||
|
@ -553,10 +555,11 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
||||||
// (Atlanta, Paris, Sydney, etc.) the reverse buildlet is:
|
// (Atlanta, Paris, Sydney, etc.) the reverse buildlet is:
|
||||||
res, err := c.doHeaderTimeout(req, 20*time.Second)
|
res, err := c.doHeaderTimeout(req, 20*time.Second)
|
||||||
if err == errHeaderTimeout {
|
if err == errHeaderTimeout {
|
||||||
|
// If we don't see headers after all that time,
|
||||||
|
// consider the buildlet to be unhealthy.
|
||||||
c.MarkBroken()
|
c.MarkBroken()
|
||||||
return nil, errors.New("buildlet: timeout waiting for exec header response")
|
return nil, errors.New("buildlet: timeout waiting for exec header response")
|
||||||
}
|
} else if err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
@ -577,7 +580,7 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
||||||
out = ioutil.Discard
|
out = ioutil.Discard
|
||||||
}
|
}
|
||||||
if _, err := io.Copy(out, res.Body); err != nil {
|
if _, err := io.Copy(out, res.Body); err != nil {
|
||||||
resc <- errs{execErr: fmt.Errorf("error copying response: %v", err)}
|
resc <- errs{execErr: fmt.Errorf("error copying response: %w", err)}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -600,10 +603,15 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
|
||||||
select {
|
select {
|
||||||
case res := <-resc:
|
case res := <-resc:
|
||||||
if res.execErr != nil {
|
if res.execErr != nil {
|
||||||
|
// Note: We've historically marked the buildlet as unhealthy after
|
||||||
|
// reaching any kind of execution error, even when it's a remote command
|
||||||
|
// execution timeout (see use of ErrTimeout below).
|
||||||
|
// This is certainly on the safer side of avoiding false positive signal,
|
||||||
|
// but maybe someday we'll want to start to rely on the buildlet to report
|
||||||
|
// such a condition and not mark it as unhealthy.
|
||||||
|
|
||||||
c.MarkBroken()
|
c.MarkBroken()
|
||||||
if res.execErr == context.DeadlineExceeded {
|
if errors.Is(res.execErr, context.DeadlineExceeded) {
|
||||||
// Historical pre-context value.
|
|
||||||
// TODO: update docs & callers to just use the context value.
|
|
||||||
res.execErr = ErrTimeout
|
res.execErr = ErrTimeout
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,12 @@ package buildlet
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
@ -171,3 +173,55 @@ func createKeyPair(t *testing.T) KeyPair {
|
||||||
}
|
}
|
||||||
return kp
|
return kp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test that Exec returns ErrTimeout upon reaching the context timeout
|
||||||
|
// during command execution, as its documentation promises.
|
||||||
|
func TestExecTimeoutError(t *testing.T) {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.HandleFunc("/status", func(w http.ResponseWriter, req *http.Request) {
|
||||||
|
json.NewEncoder(w).Encode(Status{})
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/exec", func(w http.ResponseWriter, req *http.Request) {
|
||||||
|
w.Write([]byte("."))
|
||||||
|
w.(http.Flusher).Flush() // /exec needs to flush headers right away.
|
||||||
|
<-req.Context().Done() // Simulate that execution hangs, so no more output.
|
||||||
|
})
|
||||||
|
ts := httptest.NewServer(mux)
|
||||||
|
defer ts.Close()
|
||||||
|
u, err := url.Parse(ts.URL)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unable to parse http server url %s", err)
|
||||||
|
}
|
||||||
|
cl := NewClient(u.Host, NoKeyPair)
|
||||||
|
defer cl.Close()
|
||||||
|
|
||||||
|
// Use a custom context that reports context.DeadlineExceeded
|
||||||
|
// after Exec starts command execution. (context.WithTimeout
|
||||||
|
// requires us to select an arbitrary duration, which might
|
||||||
|
// not be long enough or will make the test take too long.)
|
||||||
|
ctx := deadlineOnDemandContext{
|
||||||
|
Context: context.Background(),
|
||||||
|
done: make(chan struct{}),
|
||||||
|
}
|
||||||
|
_, execErr := cl.Exec(ctx, "./bin/test", ExecOpts{
|
||||||
|
OnStartExec: func() { close(ctx.done) },
|
||||||
|
})
|
||||||
|
if execErr != ErrTimeout {
|
||||||
|
t.Errorf("cl.Exec error = %v; want %v", execErr, ErrTimeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type deadlineOnDemandContext struct {
|
||||||
|
context.Context
|
||||||
|
done chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c deadlineOnDemandContext) Done() <-chan struct{} { return c.done }
|
||||||
|
func (c deadlineOnDemandContext) Err() error {
|
||||||
|
select {
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
case <-c.done:
|
||||||
|
return context.DeadlineExceeded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче