зеркало из https://github.com/golang/build.git
internal/relui/sign: improve communication timing visibility
I expect successful communication with a signing client to be on the order of a few seconds, maybe 10 or 20. Some signing status requests are stalling for 10 minutes. In such cases, it's better to give up earlier and try again, so cap individual status requests inside AwaitCondition to a minute at most, and prefer to try again if that doesn't work out. This should prevent the watchdog timer from reaching 10 minutes and stopping the task. Also improve logging to print more useful timing information, and fix a case where UpdateSigningStatus was always returning nil error. For golang/go#53632. Change-Id: Idb340fb6396b31570acd980adf33a52877d60012 Reviewed-on: https://go-review.googlesource.com/c/build/+/444495 Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Heschi Kreinick <heschi@google.com> Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Родитель
e987ab40b4
Коммит
14a42abee7
|
@ -10,6 +10,7 @@ import (
|
|||
"io"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/build/internal/access"
|
||||
|
@ -49,10 +50,14 @@ func NewServer() *SigningServer {
|
|||
// and receive status updates on signing requests. There is no specific order which the requests or responses
|
||||
// need to occur in. The connection returns an error once the context is canceled or an error is encountered.
|
||||
func (rs *SigningServer) UpdateSigningStatus(stream protos.ReleaseService_UpdateSigningStatusServer) error {
|
||||
_, err := access.IAPFromContext(stream.Context())
|
||||
iap, err := access.IAPFromContext(stream.Context())
|
||||
if err != nil {
|
||||
return status.Errorf(codes.Unauthenticated, "request does not contain the required authentication")
|
||||
}
|
||||
|
||||
log.Printf("SigningServer: a client connected (iap = %+v)\n", iap)
|
||||
defer log.Println("SigningServer: a client disconnected")
|
||||
|
||||
g, ctx := errgroup.WithContext(stream.Context())
|
||||
g.Go(func() error {
|
||||
for {
|
||||
|
@ -79,7 +84,7 @@ func (rs *SigningServer) UpdateSigningStatus(stream protos.ReleaseService_Update
|
|||
rs.callAndDeregister(resp.GetMessageId(), &signResponse{status: resp})
|
||||
}
|
||||
})
|
||||
if err := g.Wait(); err == nil {
|
||||
if err := g.Wait(); err != nil {
|
||||
log.Printf("SigningServer: UpdateSigningStatus=%s", err)
|
||||
return err
|
||||
}
|
||||
|
@ -88,7 +93,16 @@ func (rs *SigningServer) UpdateSigningStatus(stream protos.ReleaseService_Update
|
|||
|
||||
// do sends a signing request and returns the corresponding signing response.
|
||||
// It blocks until a response is received or the context times out or is canceled.
|
||||
func (rs *SigningServer) do(ctx context.Context, req *protos.SigningRequest) (*protos.SigningStatus, error) {
|
||||
func (rs *SigningServer) do(ctx context.Context, req *protos.SigningRequest) (resp *protos.SigningStatus, err error) {
|
||||
t := time.Now()
|
||||
defer func() {
|
||||
if err == nil {
|
||||
log.Printf("SigningServer: successfully round-tripped message=%q in %v:\n req = %v\n resp = %v\n", req.GetMessageId(), time.Since(t), req, resp)
|
||||
} else {
|
||||
log.Printf("SigningServer: communication error %v for message=%q after %v:\n req = %v\n", err, req.GetMessageId(), time.Since(t), req)
|
||||
}
|
||||
}()
|
||||
|
||||
// Register where to send the response for this message ID.
|
||||
respCh := make(chan *signResponse, 1) // Room for one response.
|
||||
rs.register(req.GetMessageId(), func(r *signResponse) { respCh <- r })
|
||||
|
@ -96,7 +110,6 @@ func (rs *SigningServer) do(ctx context.Context, req *protos.SigningRequest) (*p
|
|||
// Send the request.
|
||||
select {
|
||||
case rs.requests <- req:
|
||||
log.Printf("SigningServer: sent signing request with message=%q", req.GetMessageId())
|
||||
case <-ctx.Done():
|
||||
rs.deregister(req.GetMessageId())
|
||||
return nil, ctx.Err()
|
||||
|
|
|
@ -814,10 +814,13 @@ func (b *BuildReleaseTasks) signArtifacts(ctx *wf.TaskContext, bt sign.BuildType
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
outURLs, jobError := task.AwaitCondition(ctx, 30*time.Second, func() (out []string, done bool, _ error) {
|
||||
status, desc, out, err := b.SignService.ArtifactSigningStatus(ctx, jobID)
|
||||
outURLs, jobError := task.AwaitCondition(ctx, time.Minute, func() (out []string, done bool, _ error) {
|
||||
statusContext, cancel := context.WithTimeout(ctx, time.Minute)
|
||||
defer cancel()
|
||||
t := time.Now()
|
||||
status, desc, out, err := b.SignService.ArtifactSigningStatus(statusContext, jobID)
|
||||
if err != nil {
|
||||
ctx.Printf("ArtifactSigningStatus ran into a retryable communication error: %v\n", err)
|
||||
ctx.Printf("ArtifactSigningStatus ran into a retryable communication error after %v: %v\n", time.Since(t), err)
|
||||
return nil, false, nil
|
||||
}
|
||||
switch status {
|
||||
|
@ -838,11 +841,12 @@ func (b *BuildReleaseTasks) signArtifacts(ctx *wf.TaskContext, bt sign.BuildType
|
|||
if jobError != nil {
|
||||
// If ctx is canceled, also cancel the signing request.
|
||||
if ctx.Err() != nil {
|
||||
cancelContext, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
cancelContext, cancel := context.WithTimeout(context.Background(), time.Minute)
|
||||
defer cancel()
|
||||
t := time.Now()
|
||||
err := b.SignService.CancelSigning(cancelContext, jobID)
|
||||
if err != nil {
|
||||
ctx.Printf("CancelSigning error: %v", err)
|
||||
ctx.Printf("CancelSigning error after %v: %v\n", time.Since(t), err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче