Merge pull request #175 from hongchaodeng/m

cluster: retry on add member timeout
This commit is contained in:
Hongchao Deng 2016-10-04 19:28:43 -07:00 коммит произвёл GitHub
Родитель 8c5ab037f6 d08f199a07
Коммит 10617be061
3 изменённых файлов: 25 добавлений и 4 удалений

Просмотреть файл

@ -123,7 +123,10 @@ func (c *Cluster) run() {
running.Add(&etcdutil.Member{Name: name})
}
if err := c.reconcile(running); err != nil {
panic(err)
log.Errorf("fail to reconcile: %v", err)
if !isErrTransient(err) {
log.Fatalf("unexpected error from reconciling: %v", err)
}
}
}
}
@ -387,3 +390,12 @@ func (c *Cluster) pollPods() ([]string, []string, error) {
ready, unready := k8sutil.SliceReadyAndUnreadyPods(podList)
return ready, unready, nil
}
func isErrTransient(err error) bool {
switch err {
case errTimeoutAddMember:
return true
default:
return false
}
}

Просмотреть файл

@ -1,6 +1,7 @@
package cluster
import (
"errors"
"fmt"
"net/http"
"time"
@ -15,6 +16,10 @@ import (
"k8s.io/kubernetes/pkg/util/wait"
)
var (
errTimeoutAddMember = errors.New("timeout to add etcd member")
)
// reconcile reconciles
// - the members in the cluster view with running pods in Kubernetes.
// - the members and expect size of cluster.
@ -108,11 +113,11 @@ func (c *Cluster) addOneMember() error {
newMember := &etcdutil.Member{Name: newMemberName}
var id uint64
// Could have "unhealthy cluster" due to 5 second strict check. Retry.
err = wait.Poll(1*time.Second, 20*time.Second, func() (done bool, err error) {
err = wait.Poll(2*time.Second, 20*time.Second, func() (done bool, err error) {
ctx, _ := context.WithTimeout(context.Background(), constants.DefaultRequestTimeout)
resp, err := etcdcli.MemberAdd(ctx, []string{newMember.PeerAddr()})
if err != nil {
if err == rpctypes.ErrUnhealthy {
if err == rpctypes.ErrUnhealthy || err == context.DeadlineExceeded {
return false, nil
}
return false, fmt.Errorf("etcdcli failed to add one member: %v", err)
@ -121,6 +126,10 @@ func (c *Cluster) addOneMember() error {
return true, nil
})
if err != nil {
if err == wait.ErrWaitTimeout {
err = errTimeoutAddMember
}
log.Errorf("fail to add new member (%s): %v", newMember.Name, err)
return err
}
newMember.ID = id

Просмотреть файл

@ -197,7 +197,7 @@ func CreateAndWaitPVC(kubecli *unversioned.Client, clusterName, ns string, volum
return nil, err
}
err = wait.Poll(2*time.Second, 20*time.Second, func() (bool, error) {
err = wait.Poll(2*time.Second, 10*time.Second, func() (bool, error) {
claim, err := kubecli.PersistentVolumeClaims(ns).Get(retClaim.Name)
if err != nil {
return false, err