Merge pull request #283 from hongchaodeng/t
e2e: add disaster recovery test for all pods down
This commit is contained in:
Коммит
752751ef4e
|
@ -211,8 +211,9 @@ func (c *Cluster) disasterRecovery(left etcdutil.MemberSet) error {
|
|||
return errNoBackupExist
|
||||
}
|
||||
backupNow := false
|
||||
if len(left) != 0 {
|
||||
backupNow = requestBackupNow(c.kclient.RESTClient.Client, k8sutil.MakeBackupHostPort(c.name))
|
||||
if len(left) > 0 {
|
||||
log.Infof("cluster (%v) has some pods still running (%v). Will try to make a latest backup from one of them.", c.name, left)
|
||||
backupNow = RequestBackupNow(c.kclient.RESTClient.Client, k8sutil.MakeBackupHostPort(c.name))
|
||||
}
|
||||
if backupNow {
|
||||
log.Info("Made a latest backup successfully")
|
||||
|
@ -238,7 +239,7 @@ func (c *Cluster) disasterRecovery(left etcdutil.MemberSet) error {
|
|||
return c.restoreSeedMember()
|
||||
}
|
||||
|
||||
func requestBackupNow(httpClient *http.Client, addr string) bool {
|
||||
func RequestBackupNow(httpClient *http.Client, addr string) bool {
|
||||
resp, err := httpClient.Get(fmt.Sprintf("http://%s/backupnow", addr))
|
||||
if err != nil {
|
||||
log.Errorf("backupnow (%s) request failed: %v", addr, err)
|
||||
|
|
|
@ -104,7 +104,7 @@ func CreateBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName,
|
|||
"app": BackupPodSelectorAppField,
|
||||
"etcd_cluster": clusterName,
|
||||
}
|
||||
name := makeBackupName(clusterName)
|
||||
name := MakeBackupName(clusterName)
|
||||
_, err = kubecli.ReplicaSets(ns).Create(&extensions.ReplicaSet{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: name,
|
||||
|
@ -176,7 +176,7 @@ func CreateBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName,
|
|||
}
|
||||
|
||||
func DeleteBackupReplicaSetAndService(kubecli *unversioned.Client, clusterName, ns string, cleanup bool) error {
|
||||
name := makeBackupName(clusterName)
|
||||
name := MakeBackupName(clusterName)
|
||||
err := kubecli.Services(ns).Delete(name)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -109,7 +109,7 @@ func GetNodePortString(srv *api.Service) string {
|
|||
}
|
||||
|
||||
func MakeBackupHostPort(clusterName string) string {
|
||||
return fmt.Sprintf("%s:%d", makeBackupName(clusterName), constants.DefaultBackupPodHTTPPort)
|
||||
return fmt.Sprintf("%s:%d", MakeBackupName(clusterName), constants.DefaultBackupPodHTTPPort)
|
||||
}
|
||||
|
||||
func PodWithAddMemberInitContainer(p *api.Pod, name string, peerURLs []string, cs *spec.ClusterSpec) *api.Pod {
|
||||
|
@ -137,7 +137,7 @@ func PodWithNodeSelector(p *api.Pod, ns map[string]string) *api.Pod {
|
|||
return p
|
||||
}
|
||||
|
||||
func makeBackupName(clusterName string) string {
|
||||
func MakeBackupName(clusterName string) string {
|
||||
return fmt.Sprintf("%s-backup-tool", clusterName)
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,9 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/kube-etcd-controller/pkg/cluster"
|
||||
"github.com/coreos/kube-etcd-controller/pkg/spec"
|
||||
"github.com/coreos/kube-etcd-controller/pkg/util/constants"
|
||||
"github.com/coreos/kube-etcd-controller/pkg/util/k8sutil"
|
||||
"github.com/coreos/kube-etcd-controller/test/e2e/framework"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
|
@ -136,10 +138,22 @@ func TestOneMemberRecovery(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestDisasterRecovery(t *testing.T) {
|
||||
// TestDisasterRecovery2Members tests disaster recovery that
|
||||
// controller will make a backup from the left one pod.
|
||||
func TestDisasterRecovery2Members(t *testing.T) {
|
||||
testDisasterRecovery(t, 2)
|
||||
}
|
||||
|
||||
// TestDisasterRecoveryAll tests disaster recovery that
|
||||
// we should make a backup ahead and controller will recover cluster from it.
|
||||
func TestDisasterRecoveryAll(t *testing.T) {
|
||||
testDisasterRecovery(t, 3)
|
||||
}
|
||||
|
||||
func testDisasterRecovery(t *testing.T, numToKill int) {
|
||||
f := framework.Global
|
||||
backupPolicy := &spec.BackupPolicy{
|
||||
SnapshotIntervalInSecond: 120,
|
||||
SnapshotIntervalInSecond: 60 * 60,
|
||||
MaxSnapshot: 5,
|
||||
VolumeSizeInMB: 512,
|
||||
StorageType: spec.BackupStorageTypePersistentVolume,
|
||||
|
@ -161,15 +175,25 @@ func TestDisasterRecovery(t *testing.T) {
|
|||
names, err := waitUntilSizeReached(f, testEtcd.Name, 3, 60)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create 3 members etcd cluster: %v", err)
|
||||
return
|
||||
}
|
||||
fmt.Println("reached to 3 members cluster")
|
||||
if err := waitBackupPodUp(f, testEtcd.Name, 60*time.Second); err != nil {
|
||||
t.Fatalf("failed to create backup pod: %v", err)
|
||||
}
|
||||
// No left pod to make a backup from. We need to back up ahead.
|
||||
// If there is any left pod, controller should be able to make a backup from it.
|
||||
if numToKill == len(names) {
|
||||
if err := makeBackup(f, testEtcd.Name); err != nil {
|
||||
t.Fatalf("fail to make a latest backup: %v", err)
|
||||
}
|
||||
}
|
||||
toKill := make([]string, numToKill)
|
||||
for i := 0; i < numToKill; i++ {
|
||||
toKill[i] = names[i]
|
||||
}
|
||||
// TODO: There might be race that controller will recover members between
|
||||
// these members are deleted individually.
|
||||
if err := killMembers(f, names[0], names[1]); err != nil {
|
||||
if err := killMembers(f, toKill...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := waitUntilSizeReached(f, testEtcd.Name, 3, 120); err != nil {
|
||||
|
@ -192,6 +216,20 @@ func waitBackupPodUp(f *framework.Framework, clusterName string, timeout time.Du
|
|||
})
|
||||
}
|
||||
|
||||
func makeBackup(f *framework.Framework, clusterName string) error {
|
||||
svc, err := f.KubeClient.Services(f.Namespace.Name).Get(k8sutil.MakeBackupName(clusterName))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// In our test environment, we assume kube-proxy should be running on the same node.
|
||||
// Thus we can use the service IP.
|
||||
ok := cluster.RequestBackupNow(f.KubeClient.Client, fmt.Sprintf("%s:%d", svc.Spec.ClusterIP, constants.DefaultBackupPodHTTPPort))
|
||||
if !ok {
|
||||
return fmt.Errorf("fail to request backupnow")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func waitUntilSizeReached(f *framework.Framework, clusterName string, size, timeout int) ([]string, error) {
|
||||
return waitSizeReachedWithFilter(f, clusterName, size, timeout, func(*api.Pod) bool { return true })
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче