cmd/locktrigger: add fix for Cloud Build race

When a project uses “continuous deployment powered by Cloud Build”,
the deployment is a little bit too continuous: when multiple commits
land in a short time window, Cloud Build will run all the triggered
build jobs in parallel. If each job does “gcloud app deploy”, there
is no guarantee which will win: perhaps an older commit will complete
last, resulting in the newest commit not actually being the final
deployed version of the site. This should probably be fixed in
“continuous deployment powered by Cloud Build”, but until then,
locktrigger works around the problem.

Use locktrigger in cmd/golangorg/cloudbuild.yaml to ensure that
when multiple commits race, the newest one always runs its
go-app-deploy.sh last.

Change-Id: I5ca340250d0a3b7853fc478d35caffdd0163bb0f
Reviewed-on: https://go-review.googlesource.com/c/website/+/368365
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
This commit is contained in:
Russ Cox 2021-12-01 22:43:28 -05:00
Родитель 577a9fdbc0
Коммит c386b489b9
6 изменённых файлов: 263 добавлений и 40 удалений

Просмотреть файл

@ -4,30 +4,23 @@
steps:
- name: gcr.io/cloud-builders/git
args: [
"clone", "--branch=${_GO_REF}", "--depth=1",
"https://go.googlesource.com/go", "_gotmp",
]
args: ["clone", "--branch=${_GO_REF}", "--depth=1", "https://go.googlesource.com/go", "_gotmp"]
- name: gcr.io/cloud-builders/git
args: ["archive", "--format=zip", "--output=../_goroot.zip", "HEAD"]
dir: _gotmp
args: [
"archive", "--format=zip", "--output=../_goroot.zip", "HEAD",
]
- name: golang
args: ["rm", "-rf", "_gotmp"]
- name: golang
args: ["go", "test", "./..."]
- name: golang
entrypoint: bash
args: ["-c", "go run ./cmd/events/ > ./_content/events.yaml"]
args: ["bash", "-c", "go run ./cmd/events > ./_content/events.yaml"]
- name: golang
args: ["go", "run", "./cmd/locktrigger", "--project=$PROJECT_ID", "--build=$BUILD_ID"]
- name: gcr.io/cloud-builders/gcloud
entrypoint: bash
args: ["./go-app-deploy.sh", "cmd/golangorg/app.yaml"]
- name: golang
args: [
"go", "run", "./cmd/versionprune", "--dry_run=false",
"--project=$PROJECT_ID", "--service=default",
]
args: ["go", "run", "./cmd/versionprune", "--dry_run=false", "--project=$PROJECT_ID", "--service=default"]
options:
machineType: N1_HIGHCPU_8

225
cmd/locktrigger/main.go Normal file
Просмотреть файл

@ -0,0 +1,225 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Locktrigger “locks” a given build trigger, making sure that
// the currently running build is the only trigger running.
//
// Usage:
//
// locktrigger -project=$PROJECT_ID -build=$BUILD_ID
//
// The $PROJECT_ID and $BUILD_ID are typically written literally in cloudbuild.yaml
// and then substituted by Cloud Build.
//
// When a project uses “continuous deployment powered by Cloud Build”,
// the deployment is a little bit too continuous: when multiple commits
// land in a short time window, Cloud Build will run all the triggered
// build jobs in parallel. If each job does “gcloud app deploy”, there
// is no guarantee which will win: perhaps an older commit will complete
// last, resulting in the newest commit not actually being the final
// deployed version of the site. This should probably be fixed in
// “continuous deployment powered by Cloud Build”, but until then,
// locktrigger works around the problem.
//
// All triggered builds must run locktrigger to guarantee mutual exclusion.
// When there is contention—that is, when multiple builds are running and
// they all run locktrigger—the build corresponding to the newest commit
// is permitted to continue running, and older builds are canceled.
//
// When locktrigger exits successfully, then, at that moment, the current
// build is (or recently was) the only running build for its trigger.
// Of course, another build may start immediately after locktrigger exits.
// As long as that build also runs locktrigger, then either it will cancel
// itself (if it is older than we are), or it will cancel us before proceeding
// (if we are older than it is).
package main
import (
"bytes"
"context"
"flag"
"fmt"
"log"
"os"
"os/exec"
"strings"
"time"
cloudbuild "cloud.google.com/go/cloudbuild/apiv1/v2"
"google.golang.org/api/iterator"
cloudbuildpb "google.golang.org/genproto/googleapis/devtools/cloudbuild/v1"
)
var (
project = flag.String("project", "", "GCP project `name` (required)")
build = flag.String("build", "", "GCP build `id` (required)")
)
func usage() {
fmt.Fprintf(os.Stderr, "usage: locktrigger -project=name -build=id\n")
os.Exit(2)
}
func main() {
flag.Usage = usage
flag.Parse()
log.SetPrefix("locktrigger: ")
log.SetFlags(0)
if *project == "" || *build == "" {
usage()
}
ctx := context.Background()
c, err := cloudbuild.NewClient(ctx)
if err != nil {
log.Fatal(err)
}
defer c.Close()
// Find commit hash of local Git
myHash := run("git", "rev-parse", "HEAD")
log.Printf("my hash: %v", myHash)
// Find build object for current build, check that it matches.
self := getBuild(c, ctx, *build)
if hash := self.Substitutions["COMMIT_SHA"]; hash != myHash {
log.Fatalf("build COMMIT_SHA does not match local hash: %v != %v", hash, myHash)
}
log.Printf("my build: %v", self.Id)
if self.BuildTriggerId == "" {
log.Fatalf("build has no trigger ID")
}
log.Printf("my trigger: %v", self.BuildTriggerId)
// List all builds for our trigger that are still running.
req := &cloudbuildpb.ListBuildsRequest{
ProjectId: *project,
// Note: Really want "status=WORKING buildTriggerId="+self.BuildTriggerId,
// but that fails with an InvalidArgument error for unknown reasons.
// status=WORKING will narrow the list down to something reasonable,
// and we filter the unrelated triggers below.
Filter: "status=WORKING",
}
it := c.ListBuilds(ctx, req)
foundSelf := false
shallow := false
if _, err := os.Stat(run("git", "rev-parse", "--git-dir") + "/shallow"); err == nil {
shallow = true
}
for {
b, err := it.Next()
if err == iterator.Done {
break
}
if err != nil {
log.Fatalf("reading builds: %v (%q)", err, req.Filter)
}
if b.BuildTriggerId != self.BuildTriggerId {
continue
}
// Check whether this build is an older or newer commit.
// If this build is older, cancel it.
// If this build is newer, cancel ourselves.
if b.Id == self.Id {
foundSelf = true
continue
}
hash := b.Substitutions["COMMIT_SHA"]
if hash == "" {
log.Fatalf("cannot find COMMIT_SHA for build %v", b.Id)
}
if hash == myHash {
log.Fatalf("found another build %v at same commit %v", b.Id, hash)
}
// Fetch the full Git repo so we can answer the history questions.
// This is delayed until now to avoid the expense of fetching the full repo
// if we are the only build that is running.
if shallow {
log.Printf("git fetch --unshallow")
run("git", "fetch", "--unshallow")
shallow = false
}
// Contention.
// Find the common ancestor between us and that build,
// to tell whether we're older, it's older, or we're unrelated.
log.Printf("checking %v", hash)
switch run("git", "merge-base", myHash, hash) {
default:
log.Fatalf("unexpected build for unrelated commit %v", hash)
case myHash:
// myHash is older than b's hash. Cancel self.
log.Printf("canceling self, for build %v commit %v", b.Id, hash)
cancel(c, ctx, self.Id)
case hash:
// b's hash is older than myHash. Cancel b.
log.Printf("canceling build %v commit %v", b.Id, hash)
cancel(c, ctx, b.Id)
}
}
// If we listed all the in-progress builds, we should have seen ourselves.
if !foundSelf {
log.Fatalf("reading builds: didn't find self")
}
}
// getBuild returns the build info for the build with the given id.
func getBuild(c *cloudbuild.Client, ctx context.Context, id string) *cloudbuildpb.Build {
req := &cloudbuildpb.GetBuildRequest{
ProjectId: *project,
Id: id,
}
b, err := c.GetBuild(ctx, req)
if err != nil {
log.Fatalf("getbuild %v: %v", id, err)
}
return b
}
// cancel cancels the build with the given id.
func cancel(c *cloudbuild.Client, ctx context.Context, id string) {
req := &cloudbuildpb.CancelBuildRequest{
ProjectId: *project,
Id: id,
}
_, err := c.CancelBuild(ctx, req)
if err != nil {
// Not Fatal: maybe cancel failed because the build exited.
// Waiting for it to stop running below will take care of that case.
log.Printf("cancel %v: %v", id, err)
}
// Wait for build to report being stopped,
// in case cancel only queues the cancellation and doesn't actually wait,
// or in case cancel failed.
// Willing to wait a few minutes.
now := time.Now()
for time.Since(now) < 3*time.Minute {
b := getBuild(c, ctx, id)
if b.Status != cloudbuildpb.Build_WORKING {
log.Printf("canceled %v: now %v", id, b.Status)
return
}
time.Sleep(10 * time.Second)
}
log.Fatalf("cancel %v: did not stop", id)
}
// run runs the given command line and returns the standard output, with spaces trimmed.
func run(args ...string) string {
var stdout, stderr bytes.Buffer
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
log.Fatalf("exec %v: %v\n%s%s", args, err, stdout.String(), stderr.String())
}
return strings.TrimSpace(stdout.String())
}

Просмотреть файл

@ -1,5 +1,9 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Binary versionprune prunes stale AppEngine versions for a specified service.
Versionprune prunes stale AppEngine versions for a specified service.
The command by default will:
- keep the latest 5 versions

Просмотреть файл

@ -1,9 +1,14 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"context"
"flag"
"fmt"
"log"
"os"
"sort"
"time"
@ -12,35 +17,33 @@ import (
)
var (
dryRun = flag.Bool("dry_run", true, "When true, just print intended modifications and quit")
keepDuration = flag.Duration("keep_duration", 24*time.Hour, "Versions older than this will be deleted")
keepNumber = flag.Int("keep_number", 5, "Minimum number of versions to keep")
project = flag.String("project", "", "GCP Project (required)")
service = flag.String("service", "", "AppEngine service (required)")
dryRun = flag.Bool("dry_run", true, "print but do not run changes")
keepDuration = flag.Duration("keep_duration", 24*time.Hour, "keep versions with age < `t`")
keepNumber = flag.Int("keep_number", 5, "keep at least `n` versions")
project = flag.String("project", "", "GCP project `name` (required)")
service = flag.String("service", "", "AppEngine service `name` (required)")
)
func main() {
flag.Parse()
func usage() {
fmt.Fprintf(os.Stderr, "usage: versionprune -project=name -service=name [options]\n")
flag.PrintDefaults()
os.Exit(2)
}
if *project == "" {
fmt.Println("-project flag is required.")
flag.Usage()
os.Exit(1)
}
if *service == "" {
fmt.Println("-service flag is required.")
flag.Usage()
os.Exit(1)
func main() {
flag.Usage = usage
flag.Parse()
log.SetPrefix("versionprune: ")
log.SetFlags(0)
if *project == "" || *service == "" {
usage()
}
if *keepDuration < 0 {
fmt.Printf("-keep_duration must be greater or equal to 0, got %s\n", *keepDuration)
flag.Usage()
os.Exit(1)
log.Fatalf("-keep_duration=%v must be >= 0", *keepDuration)
}
if *keepNumber < 0 {
fmt.Printf("-keep_number must be greater or equal to 0, got %d\n", *keepNumber)
flag.Usage()
os.Exit(1)
log.Fatalf("-keep_number=%d must be >= 0", *keepNumber)
}
if err := run(context.Background()); err != nil {

3
go.mod
Просмотреть файл

@ -3,6 +3,7 @@ module golang.org/x/website
go 1.16
require (
cloud.google.com/go v0.88.0
cloud.google.com/go/datastore v1.2.0
github.com/gomodule/redigo v2.0.0+incompatible
github.com/google/go-cmp v0.5.6
@ -11,8 +12,8 @@ require (
golang.org/x/build v0.0.0-20211102155042-c046fca86e58
golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985
golang.org/x/tools v0.1.5
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0
google.golang.org/api v0.51.0
google.golang.org/genproto v0.0.0-20210726143408-b02e89920bf0
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
)

3
go.sum
Просмотреть файл

@ -993,13 +993,10 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.3-0.20210525215409-a3eb095d6aee/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA=
golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0 h1:a8Cl2fISREZQwBT5izVICCIC51QrZXfV087EaJMK7ZY=
golang.org/x/tour v0.0.0-20210526031051-3891a3eb15c0/go.mod h1:7RMQeqT5ScoysCgwPp55tOo09RuvuVD10CBiMXGyVzQ=
golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=