buildlet: fix Exec to return ErrTimeout on timeout

The coordinator relies on Exec reporting that the given timeout was
exceeded in order to mark a build as failed instead of retrying it.
A refactor resulted in Exec no longer doing that, despite what its
documentation promises, so fix that.

Also add a test since evidence shows that catching a regression can
be helpful.

For golang/go#42699.
Updates golang/go#35707.

Change-Id: Iacef90b83e7b81fad88a33baa6489d5157e3528f
Reviewed-on: https://go-review.googlesource.com/c/build/+/407555
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Dmitri Shuralyov 2022-05-20 13:43:07 -04:00 коммит произвёл Gopher Robot
Родитель a3f1e410a4
Коммит 33d38b8f07
2 изменённых файлов: 70 добавлений и 8 удалений

Просмотреть файл

@ -509,6 +509,8 @@ type ExecOpts struct {
OnStartExec func()
}
// ErrTimeout is a sentinel error that represents that waiting
// for a command to complete has exceeded the given timeout.
var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
// Exec runs cmd on the buildlet.
@ -519,8 +521,8 @@ var ErrTimeout = errors.New("buildlet: timeout waiting for command to complete")
// seen to completition. If execErr is non-nil, the remoteErr is
// meaningless.
//
// If the context's deadline is exceeded, the returned execErr is
// ErrTimeout.
// If the context's deadline is exceeded while waiting for the command
// to complete, the returned execErr is ErrTimeout.
func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr, execErr error) {
var mode string
if opts.SystemLevel {
@ -553,10 +555,11 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
// (Atlanta, Paris, Sydney, etc.) the reverse buildlet is:
res, err := c.doHeaderTimeout(req, 20*time.Second)
if err == errHeaderTimeout {
// If we don't see headers after all that time,
// consider the buildlet to be unhealthy.
c.MarkBroken()
return nil, errors.New("buildlet: timeout waiting for exec header response")
}
if err != nil {
} else if err != nil {
return nil, err
}
defer res.Body.Close()
@ -577,7 +580,7 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
out = ioutil.Discard
}
if _, err := io.Copy(out, res.Body); err != nil {
resc <- errs{execErr: fmt.Errorf("error copying response: %v", err)}
resc <- errs{execErr: fmt.Errorf("error copying response: %w", err)}
return
}
@ -600,10 +603,15 @@ func (c *client) Exec(ctx context.Context, cmd string, opts ExecOpts) (remoteErr
select {
case res := <-resc:
if res.execErr != nil {
// Note: We've historically marked the buildlet as unhealthy after
// reaching any kind of execution error, even when it's a remote command
// execution timeout (see use of ErrTimeout below).
// This is certainly on the safer side of avoiding false positive signal,
// but maybe someday we'll want to start to rely on the buildlet to report
// such a condition and not mark it as unhealthy.
c.MarkBroken()
if res.execErr == context.DeadlineExceeded {
// Historical pre-context value.
// TODO: update docs & callers to just use the context value.
if errors.Is(res.execErr, context.DeadlineExceeded) {
res.execErr = ErrTimeout
}
}

Просмотреть файл

@ -7,10 +7,12 @@ package buildlet
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"net"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
)
@ -171,3 +173,55 @@ func createKeyPair(t *testing.T) KeyPair {
}
return kp
}
// Test that Exec returns ErrTimeout upon reaching the context timeout
// during command execution, as its documentation promises.
func TestExecTimeoutError(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/status", func(w http.ResponseWriter, req *http.Request) {
json.NewEncoder(w).Encode(Status{})
})
mux.HandleFunc("/exec", func(w http.ResponseWriter, req *http.Request) {
w.Write([]byte("."))
w.(http.Flusher).Flush() // /exec needs to flush headers right away.
<-req.Context().Done() // Simulate that execution hangs, so no more output.
})
ts := httptest.NewServer(mux)
defer ts.Close()
u, err := url.Parse(ts.URL)
if err != nil {
t.Fatalf("unable to parse http server url %s", err)
}
cl := NewClient(u.Host, NoKeyPair)
defer cl.Close()
// Use a custom context that reports context.DeadlineExceeded
// after Exec starts command execution. (context.WithTimeout
// requires us to select an arbitrary duration, which might
// not be long enough or will make the test take too long.)
ctx := deadlineOnDemandContext{
Context: context.Background(),
done: make(chan struct{}),
}
_, execErr := cl.Exec(ctx, "./bin/test", ExecOpts{
OnStartExec: func() { close(ctx.done) },
})
if execErr != ErrTimeout {
t.Errorf("cl.Exec error = %v; want %v", execErr, ErrTimeout)
}
}
type deadlineOnDemandContext struct {
context.Context
done chan struct{}
}
func (c deadlineOnDemandContext) Done() <-chan struct{} { return c.done }
func (c deadlineOnDemandContext) Err() error {
select {
default:
return nil
case <-c.done:
return context.DeadlineExceeded
}
}