зеркало из https://github.com/golang/build.git
cmd/coordinator, all: fix more things related to multi-zone buildlets
This fixes stuff in CL 210498 and CL 210237. I renamed the Zone field to ControlZone both to make it more clear and to force compilation errors wherever Zone was used previously, which revealed some things that were missed. Updates golang/go#35987 Change-Id: I2f890727ece86d093a90a3b47701caa58de6ccbc Reviewed-on: https://go-review.googlesource.com/c/build/+/210541 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Alexander Rakoczy <alex@golang.org>
This commit is contained in:
Родитель
c1b987df2a
Коммит
326548a346
|
@ -65,10 +65,10 @@ type Environment struct {
|
|||
// disabled and the coordinator serves on 8119.
|
||||
IsProd bool
|
||||
|
||||
// Zone is the GCE zone that the coordinator instance and Kubernetes cluster
|
||||
// ControlZone is the GCE zone that the coordinator instance and Kubernetes cluster
|
||||
// will run in. This field may be overridden as necessary without impacting
|
||||
// other fields.
|
||||
Zone string
|
||||
ControlZone string
|
||||
|
||||
// VMZones are the GCE zones that the VMs will be deployed to. These
|
||||
// GCE zones will be periodically cleaned by deleting old VMs. The zones
|
||||
|
@ -137,14 +137,14 @@ func (e Environment) ComputePrefix() string {
|
|||
// The Zone value will be returned if VMZones is not set.
|
||||
func (e Environment) RandomVMZone() string {
|
||||
if len(e.VMZones) == 0 {
|
||||
return e.Zone
|
||||
return e.ControlZone
|
||||
}
|
||||
return e.VMZones[rand.Intn(len(e.VMZones))]
|
||||
}
|
||||
|
||||
// Region returns the GCE region, derived from its zone.
|
||||
func (e Environment) Region() string {
|
||||
return e.Zone[:strings.LastIndex(e.Zone, "-")]
|
||||
return e.ControlZone[:strings.LastIndex(e.ControlZone, "-")]
|
||||
}
|
||||
|
||||
// SnapshotURL returns the absolute URL of the .tar.gz containing a
|
||||
|
@ -227,7 +227,7 @@ var Staging = &Environment{
|
|||
ProjectName: "go-dashboard-dev",
|
||||
ProjectNumber: 302018677728,
|
||||
IsProd: true,
|
||||
Zone: "us-central1-f",
|
||||
ControlZone: "us-central1-f",
|
||||
VMZones: []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
|
||||
StaticIP: "104.154.113.235",
|
||||
MachineType: "n1-standard-1",
|
||||
|
@ -258,7 +258,7 @@ var Production = &Environment{
|
|||
ProjectName: "symbolic-datum-552",
|
||||
ProjectNumber: 872405196845,
|
||||
IsProd: true,
|
||||
Zone: "us-central1-f",
|
||||
ControlZone: "us-central1-f",
|
||||
VMZones: []string{"us-central1-a", "us-central1-b", "us-central1-c", "us-central1-f"},
|
||||
StaticIP: "107.178.219.46",
|
||||
MachineType: "n1-standard-4",
|
||||
|
|
|
@ -10,45 +10,41 @@ import (
|
|||
|
||||
func TestEnvironmentNextZone(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
env Environment
|
||||
wantZone []string // desired zone should appear in this slice
|
||||
name string
|
||||
env Environment
|
||||
wantOneOf []string // desired zone should appear in this slice
|
||||
}{
|
||||
{
|
||||
name: "zones-not-set",
|
||||
env: Environment{
|
||||
Zone: "kentucky",
|
||||
VMZones: []string{},
|
||||
ControlZone: "kentucky",
|
||||
VMZones: []string{},
|
||||
},
|
||||
wantZone: []string{"kentucky"},
|
||||
wantOneOf: []string{"kentucky"},
|
||||
},
|
||||
{
|
||||
name: "zone-and-zones-set",
|
||||
env: Environment{
|
||||
Zone: "kentucky",
|
||||
VMZones: []string{"texas", "california", "washington"},
|
||||
ControlZone: "kentucky",
|
||||
VMZones: []string{"texas", "california", "washington"},
|
||||
},
|
||||
|
||||
wantZone: []string{"texas", "california", "washington"},
|
||||
wantOneOf: []string{"texas", "california", "washington"},
|
||||
},
|
||||
{
|
||||
name: "zones-only-contains-one-entry",
|
||||
env: Environment{
|
||||
Zone: "kentucky",
|
||||
VMZones: []string{"texas"},
|
||||
ControlZone: "kentucky",
|
||||
VMZones: []string{"texas"},
|
||||
},
|
||||
wantZone: []string{"texas"},
|
||||
wantOneOf: []string{"texas"},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
e := Environment{
|
||||
Zone: tc.env.Zone,
|
||||
VMZones: tc.env.VMZones,
|
||||
}
|
||||
got := e.RandomVMZone()
|
||||
if !containsString(got, tc.wantZone) {
|
||||
t.Errorf("got=%q; want %v", got, tc.wantZone)
|
||||
got := tc.env.RandomVMZone()
|
||||
if !containsString(got, tc.wantOneOf) {
|
||||
t.Errorf("got=%q; want %v", got, tc.wantOneOf)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ type VMOpts struct {
|
|||
|
||||
// OnInstanceCreated optionally specifies a hook to run synchronously
|
||||
// after the computeService.Instances.Get call.
|
||||
OnGotInstanceInfo func()
|
||||
OnGotInstanceInfo func(*compute.Instance)
|
||||
|
||||
// OnBeginBuildletProbe optionally specifies a hook to run synchronously
|
||||
// before StartNewVM tries to hit buildletURL to see if it's up yet.
|
||||
|
@ -98,6 +98,7 @@ func StartNewVM(creds *google.Credentials, buildEnv *buildenv.Environment, instN
|
|||
if opts.Zone == "" {
|
||||
opts.Zone = buildEnv.RandomVMZone()
|
||||
}
|
||||
zone := opts.Zone
|
||||
if opts.DeleteIn == 0 {
|
||||
opts.DeleteIn = 30 * time.Minute
|
||||
}
|
||||
|
@ -110,12 +111,6 @@ func StartNewVM(creds *google.Credentials, buildEnv *buildenv.Environment, instN
|
|||
return nil, fmt.Errorf("host %q is type %q; want either a VM or container type", hostType, hconf.PoolName())
|
||||
}
|
||||
|
||||
zone := opts.Zone
|
||||
if zone == "" {
|
||||
// TODO: automatic? maybe that's not useful.
|
||||
// For now just return an error.
|
||||
return nil, errors.New("buildlet: missing required Zone option")
|
||||
}
|
||||
projectID := opts.ProjectID
|
||||
if projectID == "" {
|
||||
return nil, errors.New("buildlet: missing required ProjectID option")
|
||||
|
@ -328,7 +323,9 @@ OpLoop:
|
|||
buildletURL = "http://" + intIP
|
||||
ipPort = intIP + ":80"
|
||||
}
|
||||
condRun(opts.OnGotInstanceInfo)
|
||||
if opts.OnGotInstanceInfo != nil {
|
||||
opts.OnGotInstanceInfo(inst)
|
||||
}
|
||||
|
||||
const timeout = 5 * time.Minute
|
||||
var alive bool
|
||||
|
|
|
@ -116,7 +116,7 @@ func initGCE() error {
|
|||
|
||||
// Convert the zone from "projects/1234/zones/us-central1-a" to "us-central1-a".
|
||||
projectZone = path.Base(projectZone)
|
||||
buildEnv.Zone = projectZone
|
||||
buildEnv.ControlZone = projectZone
|
||||
|
||||
if buildEnv.StaticIP == "" {
|
||||
buildEnv.StaticIP, err = metadata.ExternalIP()
|
||||
|
@ -336,7 +336,7 @@ func (p *gceBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg l
|
|||
waitBuildlet = lg.CreateSpan("wait_buildlet_start", instName)
|
||||
curSpan = waitBuildlet
|
||||
},
|
||||
OnGotInstanceInfo: func() {
|
||||
OnGotInstanceInfo: func(*compute.Instance) {
|
||||
lg.LogEventTime("got_instance_info", "waiting_for_buildlet...")
|
||||
},
|
||||
Zone: zone,
|
||||
|
@ -354,12 +354,12 @@ func (p *gceBuildletPool) GetBuildlet(ctx context.Context, hostType string, lg l
|
|||
waitBuildlet.Done(nil)
|
||||
bc.SetDescription("GCE VM: " + instName)
|
||||
bc.SetOnHeartbeatFailure(func() {
|
||||
p.putBuildlet(bc, hostType, instName)
|
||||
p.putBuildlet(bc, hostType, zone, instName)
|
||||
})
|
||||
return bc, nil
|
||||
}
|
||||
|
||||
func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, hostType, instName string) error {
|
||||
func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, hostType, zone, instName string) error {
|
||||
// TODO(bradfitz): add the buildlet to a freelist (of max N
|
||||
// items) for up to 10 minutes since when it got started if
|
||||
// it's never seen a command execution failure, and we can
|
||||
|
@ -369,7 +369,7 @@ func (p *gceBuildletPool) putBuildlet(bc *buildlet.Client, hostType, instName st
|
|||
// buildlet client library between Close, Destroy/Halt, and
|
||||
// tracking execution errors. That was all half-baked before
|
||||
// and thus removed. Now Close always destroys everything.
|
||||
deleteVM(buildEnv.Zone, instName)
|
||||
deleteVM(zone, instName)
|
||||
p.setInstanceUsed(instName, false)
|
||||
|
||||
hconf, ok := dashboard.Hosts[hostType]
|
||||
|
|
|
@ -59,7 +59,7 @@ func initKube() error {
|
|||
var err error
|
||||
buildletsKubeClient, err = gke.NewClient(ctx,
|
||||
buildEnv.KubeBuild.Name,
|
||||
gke.OptZone(buildEnv.Zone),
|
||||
gke.OptZone(buildEnv.ControlZone),
|
||||
gke.OptProject(buildEnv.ProjectName),
|
||||
gke.OptTokenSource(gcpCreds.TokenSource))
|
||||
if err != nil {
|
||||
|
@ -68,7 +68,7 @@ func initKube() error {
|
|||
|
||||
goKubeClient, err = gke.NewClient(ctx,
|
||||
buildEnv.KubeTools.Name,
|
||||
gke.OptZone(buildEnv.Zone),
|
||||
gke.OptZone(buildEnv.ControlZone),
|
||||
gke.OptProject(buildEnv.ProjectName),
|
||||
gke.OptTokenSource(gcpCreds.TokenSource))
|
||||
if err != nil {
|
||||
|
@ -437,7 +437,7 @@ func (p *kubeBuildletPool) cleanUpOldPods(ctx context.Context) {
|
|||
}
|
||||
if err == nil && time.Now().Unix() > unixDeadline {
|
||||
stats.DeletedOld++
|
||||
log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, buildEnv.Zone)
|
||||
log.Printf("cleanUpOldPods: Deleting expired pod %q in zone %q ...", pod.Name, buildEnv.ControlZone)
|
||||
err = buildletsKubeClient.DeletePod(ctx, pod.Name)
|
||||
if err != nil {
|
||||
log.Printf("cleanUpOldPods: problem deleting old pod %q: %v", pod.Name, err)
|
||||
|
|
|
@ -90,9 +90,6 @@ func main() {
|
|||
}
|
||||
|
||||
env = buildenv.FromFlags()
|
||||
if *zone != "" {
|
||||
env.Zone = *zone
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
|
@ -106,15 +103,20 @@ func main() {
|
|||
name := fmt.Sprintf("debug-temp-%d", time.Now().Unix())
|
||||
|
||||
log.Printf("Creating %s (with VM image %s)", name, vmImageSummary)
|
||||
var zoneSelected string
|
||||
bc, err := buildlet.StartNewVM(creds, env, name, *hostType, buildlet.VMOpts{
|
||||
Zone: *zone,
|
||||
OnInstanceRequested: func() { log.Printf("instance requested") },
|
||||
OnInstanceCreated: func() {
|
||||
log.Printf("instance created")
|
||||
if *serial {
|
||||
go watchSerial(name)
|
||||
go watchSerial(zoneSelected, name)
|
||||
}
|
||||
},
|
||||
OnGotInstanceInfo: func() { log.Printf("got instance info") },
|
||||
OnGotInstanceInfo: func(inst *compute.Instance) {
|
||||
zoneSelected = inst.Zone
|
||||
log.Printf("got instance info; running in %v", zoneSelected)
|
||||
},
|
||||
OnBeginBuildletProbe: func(buildletURL string) {
|
||||
log.Printf("About to hit %s to see if buildlet is up yet...", buildletURL)
|
||||
},
|
||||
|
@ -213,11 +215,11 @@ func main() {
|
|||
// gcloud compute connect-to-serial-port --zone=xxx $NAME
|
||||
// but in Go and works. For some reason, gcloud doesn't work as a
|
||||
// child process and has weird errors.
|
||||
func watchSerial(name string) {
|
||||
func watchSerial(zone, name string) {
|
||||
start := int64(0)
|
||||
indent := strings.Repeat(" ", len("2017/07/25 06:37:14 SERIAL: "))
|
||||
for {
|
||||
sout, err := computeSvc.Instances.GetSerialPortOutput(env.ProjectName, env.Zone, name).Start(start).Do()
|
||||
sout, err := computeSvc.Instances.GetSerialPortOutput(env.ProjectName, zone, name).Start(start).Do()
|
||||
if err != nil {
|
||||
log.Printf("serial output error: %v", err)
|
||||
return
|
||||
|
|
|
@ -37,7 +37,7 @@ resources:
|
|||
- name: "{{ .Kube.Name }}"
|
||||
type: container.v1.cluster
|
||||
properties:
|
||||
zone: "{{ .Env.Zone }}"
|
||||
zone: "{{ .Env.ControlZone }}"
|
||||
cluster:
|
||||
initial_node_count: {{ .Kube.MinNodes }}
|
||||
network: "default"
|
||||
|
|
|
@ -56,12 +56,12 @@ func main() {
|
|||
case "kubectl":
|
||||
env := getEnv()
|
||||
curCtx := kubeCurrentContext()
|
||||
wantCtx := fmt.Sprintf("gke_%s_%s_go", env.ProjectName, env.Zone)
|
||||
wantCtx := fmt.Sprintf("gke_%s_%s_go", env.ProjectName, env.ControlZone)
|
||||
if curCtx != wantCtx {
|
||||
log.SetFlags(0)
|
||||
log.Fatalf("Wrong kubectl context; currently using %q; want %q\nRun:\n gcloud container clusters get-credentials --project=%s --zone=%s go",
|
||||
curCtx, wantCtx,
|
||||
env.ProjectName, env.Zone,
|
||||
env.ProjectName, env.ControlZone,
|
||||
)
|
||||
}
|
||||
// gcloud container clusters get-credentials --zone=us-central1-f go
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -88,7 +89,7 @@ func (c *Client) MakeBasepinDisks(ctx context.Context) error {
|
|||
return err
|
||||
}
|
||||
if err := c.AwaitOp(ctx, op); err != nil {
|
||||
log.Fatalf("basepin: failed to create: %v", err)
|
||||
return fmt.Errorf("basepin: failed to create: %v", err)
|
||||
}
|
||||
}
|
||||
log.Printf("basepin: created %d images in %v", len(needed), zone)
|
||||
|
@ -103,10 +104,10 @@ func (c *Client) AwaitOp(ctx context.Context, op *compute.Operation) error {
|
|||
svc := c.Compute()
|
||||
opName := op.Name
|
||||
// TODO: move logging to Client c.logger. and add Client.WithLogger shallow copier.
|
||||
log.Printf("Waiting on operation %v", opName)
|
||||
log.Printf("Waiting on operation %v (in %q)", opName, op.Zone)
|
||||
for {
|
||||
time.Sleep(2 * time.Second)
|
||||
op, err := svc.ZoneOperations.Get(c.Env.ProjectName, c.Env.Zone, opName).Do()
|
||||
op, err := svc.ZoneOperations.Get(c.Env.ProjectName, path.Base(op.Zone), opName).Do()
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to get op %s: %v", opName, err)
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче