Merge pull request #175 from hongchaodeng/m
cluster: retry on add member timeout
This commit is contained in:
Коммит
10617be061
|
@ -123,7 +123,10 @@ func (c *Cluster) run() {
|
|||
running.Add(&etcdutil.Member{Name: name})
|
||||
}
|
||||
if err := c.reconcile(running); err != nil {
|
||||
panic(err)
|
||||
log.Errorf("fail to reconcile: %v", err)
|
||||
if !isErrTransient(err) {
|
||||
log.Fatalf("unexpected error from reconciling: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -387,3 +390,12 @@ func (c *Cluster) pollPods() ([]string, []string, error) {
|
|||
ready, unready := k8sutil.SliceReadyAndUnreadyPods(podList)
|
||||
return ready, unready, nil
|
||||
}
|
||||
|
||||
func isErrTransient(err error) bool {
|
||||
switch err {
|
||||
case errTimeoutAddMember:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package cluster
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
@ -15,6 +16,10 @@ import (
|
|||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
)
|
||||
|
||||
var (
|
||||
errTimeoutAddMember = errors.New("timeout to add etcd member")
|
||||
)
|
||||
|
||||
// reconcile reconciles
|
||||
// - the members in the cluster view with running pods in Kubernetes.
|
||||
// - the members and expect size of cluster.
|
||||
|
@ -108,11 +113,11 @@ func (c *Cluster) addOneMember() error {
|
|||
newMember := &etcdutil.Member{Name: newMemberName}
|
||||
var id uint64
|
||||
// Could have "unhealthy cluster" due to 5 second strict check. Retry.
|
||||
err = wait.Poll(1*time.Second, 20*time.Second, func() (done bool, err error) {
|
||||
err = wait.Poll(2*time.Second, 20*time.Second, func() (done bool, err error) {
|
||||
ctx, _ := context.WithTimeout(context.Background(), constants.DefaultRequestTimeout)
|
||||
resp, err := etcdcli.MemberAdd(ctx, []string{newMember.PeerAddr()})
|
||||
if err != nil {
|
||||
if err == rpctypes.ErrUnhealthy {
|
||||
if err == rpctypes.ErrUnhealthy || err == context.DeadlineExceeded {
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("etcdcli failed to add one member: %v", err)
|
||||
|
@ -121,6 +126,10 @@ func (c *Cluster) addOneMember() error {
|
|||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
if err == wait.ErrWaitTimeout {
|
||||
err = errTimeoutAddMember
|
||||
}
|
||||
log.Errorf("fail to add new member (%s): %v", newMember.Name, err)
|
||||
return err
|
||||
}
|
||||
newMember.ID = id
|
||||
|
|
|
@ -197,7 +197,7 @@ func CreateAndWaitPVC(kubecli *unversioned.Client, clusterName, ns string, volum
|
|||
return nil, err
|
||||
}
|
||||
|
||||
err = wait.Poll(2*time.Second, 20*time.Second, func() (bool, error) {
|
||||
err = wait.Poll(2*time.Second, 10*time.Second, func() (bool, error) {
|
||||
claim, err := kubecli.PersistentVolumeClaims(ns).Get(retClaim.Name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
|
|
Загрузка…
Ссылка в новой задаче