Merge pull request #283 from hongchaodeng/t

e2e: add disaster recovery test for all pods down
This commit is contained in:
Xiang Li 2016-10-28 17:49:12 -07:00 коммит произвёл GitHub
Родитель 07ddbec5cb ad86da1639
Коммит 752751ef4e
4 изменённых файлов: 50 добавлений и 11 удалений

Просмотреть файл

@ -211,8 +211,9 @@ func (c *Cluster) disasterRecovery(left etcdutil.MemberSet) error {
return errNoBackupExist
}
backupNow := false
if len(left) != 0 {
backupNow = requestBackupNow(c.kclient.RESTClient.Client, k8sutil.MakeBackupHostPort(c.name))
if len(left) > 0 {
log.Infof("cluster (%v) has some pods still running (%v). Will try to make a latest backup from one of them.", c.name, left)
backupNow = RequestBackupNow(c.kclient.RESTClient.Client, k8sutil.MakeBackupHostPort(c.name))
}
if backupNow {
log.Info("Made a latest backup successfully")
@ -238,7 +239,7 @@ func (c *Cluster) disasterRecovery(left etcdutil.MemberSet) error {
return c.restoreSeedMember()
}
func requestBackupNow(httpClient *http.Client, addr string) bool {
func RequestBackupNow(httpClient *http.Client, addr string) bool {
resp, err := httpClient.Get(fmt.Sprintf("http://%s/backupnow", addr))
if err != nil {
log.Errorf("backupnow (%s) request failed: %v", addr, err)

Просмотреть файл

@ -104,7 +104,7 @@ func CreateBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName,
"app": BackupPodSelectorAppField,
"etcd_cluster": clusterName,
}
name := makeBackupName(clusterName)
name := MakeBackupName(clusterName)
_, err = kubecli.ReplicaSets(ns).Create(&extensions.ReplicaSet{
ObjectMeta: api.ObjectMeta{
Name: name,
@ -176,7 +176,7 @@ func CreateBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName,
}
func DeleteBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName, ns string, cleanup bool) error {
name := makeBackupName(clusterName)
name := MakeBackupName(clusterName)
err := kubecli.Services(ns).Delete(name)
if err != nil {
return err

Просмотреть файл

@ -109,7 +109,7 @@ func GetNodePortString(srv *api.Service) string {
}
func MakeBackupHostPort(clusterName string) string {
return fmt.Sprintf("%s:%d", makeBackupName(clusterName), constants.DefaultBackupPodHTTPPort)
return fmt.Sprintf("%s:%d", MakeBackupName(clusterName), constants.DefaultBackupPodHTTPPort)
}
func PodWithAddMemberInitContainer(p *api.Pod, name string, peerURLs []string, cs *spec.ClusterSpec) *api.Pod {
@ -137,7 +137,7 @@ func PodWithNodeSelector(p *api.Pod, ns map[string]string) *api.Pod {
return p
}
func makeBackupName(clusterName string) string {
func MakeBackupName(clusterName string) string {
return fmt.Sprintf("%s-backup-tool", clusterName)
}

Просмотреть файл

@ -23,7 +23,9 @@ import (
"testing"
"time"
"github.com/coreos/kube-etcd-controller/pkg/cluster"
"github.com/coreos/kube-etcd-controller/pkg/spec"
"github.com/coreos/kube-etcd-controller/pkg/util/constants"
"github.com/coreos/kube-etcd-controller/pkg/util/k8sutil"
"github.com/coreos/kube-etcd-controller/test/e2e/framework"
"k8s.io/kubernetes/pkg/api"
@ -136,10 +138,22 @@ func TestOneMemberRecovery(t *testing.T) {
}
}
func TestDisasterRecovery(t *testing.T) {
// TestDisasterRecovery2Members tests disaster recovery that
// controller will make a backup from the left one pod.
func TestDisasterRecovery2Members(t *testing.T) {
testDisasterRecovery(t, 2)
}
// TestDisasterRecoveryAll tests disaster recovery that
// we should make a backup ahead and controller will recover cluster from it.
func TestDisasterRecoveryAll(t *testing.T) {
testDisasterRecovery(t, 3)
}
func testDisasterRecovery(t *testing.T, numToKill int) {
f := framework.Global
backupPolicy := &spec.BackupPolicy{
SnapshotIntervalInSecond: 120,
SnapshotIntervalInSecond: 60 * 60,
MaxSnapshot: 5,
VolumeSizeInMB: 512,
StorageType: spec.BackupStorageTypePersistentVolume,
@ -161,15 +175,25 @@ func TestDisasterRecovery(t *testing.T) {
names, err := waitUntilSizeReached(f, testEtcd.Name, 3, 60)
if err != nil {
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
return
}
fmt.Println("reached to 3 members cluster")
if err := waitBackupPodUp(f, testEtcd.Name, 60*time.Second); err != nil {
t.Fatalf("failed to create backup pod: %v", err)
}
// No left pod to make a backup from. We need to back up ahead.
// If there is any left pod, controller should be able to make a backup from it.
if numToKill == len(names) {
if err := makeBackup(f, testEtcd.Name); err != nil {
t.Fatalf("fail to make a latest backup: %v", err)
}
}
toKill := make([]string, numToKill)
for i := 0; i < numToKill; i++ {
toKill[i] = names[i]
}
// TODO: There might be race that controller will recover members between
// these members are deleted individually.
if err := killMembers(f, names[0], names[1]); err != nil {
if err := killMembers(f, toKill...); err != nil {
t.Fatal(err)
}
if _, err := waitUntilSizeReached(f, testEtcd.Name, 3, 120); err != nil {
@ -192,6 +216,20 @@ func waitBackupPodUp(f *framework.Framework, clusterName string, timeout time.Du
})
}
func makeBackup(f *framework.Framework, clusterName string) error {
svc, err := f.KubeClient.Services(f.Namespace.Name).Get(k8sutil.MakeBackupName(clusterName))
if err != nil {
return err
}
// In our test environment, we assume kube-proxy should be running on the same node.
// Thus we can use the service IP.
ok := cluster.RequestBackupNow(f.KubeClient.Client, fmt.Sprintf("%s:%d", svc.Spec.ClusterIP, constants.DefaultBackupPodHTTPPort))
if !ok {
return fmt.Errorf("fail to request backupnow")
}
return nil
}
func waitUntilSizeReached(f *framework.Framework, clusterName string, size, timeout int) ([]string, error) {
return waitSizeReachedWithFilter(f, clusterName, size, timeout, func(*api.Pod) bool { return true })
}