all: adjust things for upgrade from GKE 1.2 to GKE 1.4

We hit GKE bugs and changes when upgrading from GKE 1.2 to 1.4.

The main issue is that Kubernetes does't reserve CPU or memory for
itself on nodes, so things were OOMing and getting killed. And when
Docker or Kubernetes got killed themselves, they were wedging and not
recovering.

So we're going to run a daemonset (POD on all nodes) to reserve space
for Kubernetes for it. That's not in this CL.

But this CL got us limping along and was already in production. It
doubles resource RAM usage for jobs, so fewer things schedule per node.
While we're at it, let jobs use more CPU if it's available.

Also, disable auto-scaling. It was off before by hand. Force it off
programatically too. And make the node count 5, like it was by hand.

Also, force un-graceful pod deletes, since GKE 1.3 or something
introduced a graceful-vs-ungraceful distinction, which we weren't
handling previously and therefore pods never were being deleted.

Change-Id: I3606e4e2e92c496d8194503d510921bd1614d34e
Reviewed-on: https://go-review.googlesource.com/33490
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Brad Fitzpatrick 2016-11-23 05:58:31 +00:00
Родитель 3689a13303
Коммит 71265acedb
4 изменённых файлов: 13 добавлений и 7 удалений

Просмотреть файл

@ -182,8 +182,8 @@ var Production = &Environment{
ZonesToClean: []string{"us-central1-f"},
StaticIP: "107.178.219.46",
MachineType: "n1-standard-4",
KubeMinNodes: 3,
KubeMaxNodes: 5,
KubeMinNodes: 5,
KubeMaxNodes: 5, // auto-scaling disabled
KubeName: "buildlets",
KubeMachineType: "n1-standard-32",
DashURL: "https://build.golang.org/",

Просмотреть файл

@ -23,8 +23,9 @@ import (
var (
// TODO(evanbrown): resource requirements should be
// defined per-builder in dashboard/builders.go
BuildletCPU = api.MustParse("2") // 2 Cores
BuildletMemory = api.MustParse("2000000Ki") // 2,000,000Ki RAM
BuildletCPU = api.MustParse("2") // 2 Cores
BuildletCPULimit = api.MustParse("8") // 8 Cores
BuildletMemory = api.MustParse("4000000Ki") // 4,000,000Ki RAM
)
// PodOpts control how new pods are started.
@ -99,7 +100,7 @@ func StartPod(ctx context.Context, kubeClient *kubernetes.Client, podName, hostT
api.ResourceMemory: BuildletMemory,
},
Limits: api.ResourceList{
api.ResourceCPU: BuildletCPU,
api.ResourceCPU: BuildletCPULimit,
api.ResourceMemory: BuildletMemory,
},
},

Просмотреть файл

@ -96,7 +96,11 @@ resources:
- "https://www.googleapis.com/auth/cloud-platform"
master_auth:
username: "admin"
password: "{{ .KubePassword }}"
password: "{{ .KubePassword }}"`
// Old autoscaler part:
/*
`
- name: autoscaler
type: compute.v1.autoscaler
properties:
@ -109,6 +113,7 @@ resources:
coolDownPeriodSec: 1200
cpuUtilization:
utilizationTarget: .6`
*/
func readFile(v string) string {
slurp, err := ioutil.ReadFile(v)

Просмотреть файл

@ -147,7 +147,7 @@ func (c *Client) GetPods(ctx context.Context) ([]api.Pod, error) {
// PodDelete deletes the specified Kubernetes pod.
func (c *Client) DeletePod(ctx context.Context, podName string) error {
url := c.endpointURL + defaultPod + "/" + podName
req, err := http.NewRequest("DELETE", url, nil)
req, err := http.NewRequest("DELETE", url, strings.NewReader(`{"gracePeriodSeconds":0}`))
if err != nil {
return fmt.Errorf("failed to create request: DELETE %q : %v", url, err)
}