retry due to 5 second unhealthy cluster error

This commit is contained in:
Hongchao Deng 2016-09-10 08:35:36 -07:00
Родитель 9dea75fc1e
Коммит 3da3b91c9f
1 изменённых файлов: 18 добавлений и 4 удалений

Просмотреть файл

@ -7,10 +7,12 @@ import (
log "github.com/Sirupsen/logrus"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/kube-etcd-controller/pkg/util/constants"
"github.com/coreos/kube-etcd-controller/pkg/util/etcdutil"
"github.com/coreos/kube-etcd-controller/pkg/util/k8sutil"
"golang.org/x/net/context"
"k8s.io/kubernetes/pkg/util/wait"
)
const waitMemberReadyTimeout = 10 * time.Second
@ -104,12 +106,24 @@ func (c *Cluster) addOneMember() error {
}
newMemberName := fmt.Sprintf("%s-%04d", c.name, c.idCounter)
newMember := &etcdutil.Member{Name: newMemberName}
ctx, _ := context.WithTimeout(context.Background(), constants.DefaultRequestTimeout)
resp, err := etcdcli.MemberAdd(ctx, []string{newMember.PeerAddr()})
var id uint64
// Could have "unhealthy cluster" due to 5 second strict check. Retry.
err = wait.Poll(1*time.Second, 20*time.Second, func() (done bool, err error) {
ctx, _ := context.WithTimeout(context.Background(), constants.DefaultRequestTimeout)
resp, err := etcdcli.MemberAdd(ctx, []string{newMember.PeerAddr()})
if err != nil {
if err == rpctypes.ErrUnhealthy {
return false, nil
}
return false, fmt.Errorf("etcdcli failed to add one member: %v", err)
}
id = resp.Member.ID
return true, nil
})
if err != nil {
return err
}
newMember.ID = resp.Member.ID
newMember.ID = id
c.members.Add(newMember)
if err := c.createPodAndService(c.members, newMember, "existing", false); err != nil {
@ -137,7 +151,7 @@ func (c *Cluster) removeMember(toRemove *etcdutil.Member) error {
clustercli := clientv3.NewCluster(etcdcli)
ctx, _ := context.WithTimeout(context.Background(), constants.DefaultRequestTimeout)
if _, err := clustercli.MemberRemove(ctx, toRemove.ID); err != nil {
return err
return fmt.Errorf("etcdcli failed to remove one member: %v", err)
}
c.members.Remove(toRemove.Name)
if err := c.removePodAndService(toRemove.Name); err != nil {