Merge pull request #8840 from sonne5/ywu/vtgr

Improve deterministic of bootstrap
This commit is contained in:
Deepthi Sigireddi 2021-09-23 13:41:42 -07:00 коммит произвёл GitHub
Родитель 8bfcf5c9a3 a842ee51d5
Коммит 8e04a16681
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 59 добавлений и 10 удалений

Просмотреть файл

@ -20,6 +20,7 @@ import (
"errors"
"flag"
"fmt"
"sort"
"strconv"
"sync"
"time"
@ -144,6 +145,9 @@ func (shard *GRShard) repairShardHasNoGroupAction(ctx context.Context) error {
return errors.New("unsafe to bootstrap group")
}
var candidate *grInstance
sort.SliceStable(replicas, func(i, j int) bool {
return replicas[i].alias < replicas[j].alias
})
for _, replica := range replicas {
if !shard.shardStatusCollector.isUnreachable(replica) {
candidate = replica
@ -291,7 +295,11 @@ func (shard *GRShard) stopAndRebootstrap(ctx context.Context) error {
if err := shard.checkShardLocked(ctx); err != nil {
return err
}
return shard.dbAgent.BootstrapGroupLocked(candidate.instanceKey)
uuid := shard.sqlGroup.GetGroupName()
if uuid == "" {
return errors.New("trying to rebootstrap without uuid")
}
return shard.dbAgent.RebootstrapGroupLocked(candidate.instanceKey, uuid)
}
func (shard *GRShard) getGTIDSetFromAll(skipPrimary bool) (*groupGTIDRecorder, *concurrency.AllErrorRecorder, error) {

Просмотреть файл

@ -234,6 +234,17 @@ func TestRepairShardHasInactiveGroup(t *testing.T) {
{MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""},
}, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA},
}},
{"shard has inactive group and partial group name", "", testPort0, []data{
{alias0, testHost, testPort0, "", []db.TestGroupState{
{MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""},
}, true, getMysql56GTIDSet(sid1, "1-10"), topodatapb.TabletType_REPLICA},
{alias1, testHost, testPort1, "", []db.TestGroupState{
{MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""},
}, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_MASTER},
{alias2, testHost, testPort2, "group", []db.TestGroupState{
{MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""},
}, true, getMysql56GTIDSet(sid1, "1-9"), topodatapb.TabletType_REPLICA},
}},
{"unreachable rebootstrap candidate", "vtgr repair: test_cell-0000000000 is unreachable", 0, []data{
{alias0, testHost, testPort0, "group", []db.TestGroupState{
{MemberHost: "", MemberPort: "NULL", MemberState: "OFFLINE", MemberRole: ""},
@ -344,9 +355,9 @@ func TestRepairShardHasInactiveGroup(t *testing.T) {
var lock sync.Mutex
dbAgent.
EXPECT().
// RepairShardHasNoGroup is fixed by calling BootstrapGroupLocked
BootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}).
DoAndReturn(func(target *inst.InstanceKey) error {
// RepairShardHasNoGroup is fixed by calling RebootstrapGroupLocked
RebootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}, gomock.Any()).
DoAndReturn(func(target *inst.InstanceKey, name string) error {
if target.Hostname == "" || target.Port == 0 {
return errors.New("invalid mysql instance key")
}
@ -369,6 +380,9 @@ func TestRepairShardHasInactiveGroup(t *testing.T) {
}
}
inputMap[target.Port] = input
if name != "group" {
return errors.New("unexpected group name")
}
return nil
}).
Times(expectedCalls)
@ -913,7 +927,7 @@ func TestRepairInsufficientGroupSize(t *testing.T) {
if tt.expectedCandidatePort != 0 {
dbAgent.
EXPECT().
SetSuperReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), true).
SetReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), true).
Return(nil).
Times(1)
}
@ -1023,7 +1037,7 @@ func TestRepairReadOnlyShard(t *testing.T) {
if tt.expectedCandidatePort != 0 {
dbAgent.
EXPECT().
SetSuperReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), false).
SetReadOnly(gomock.Eq(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}), false).
Return(nil).
Times(1)
}
@ -1129,8 +1143,8 @@ func TestRepairBackoffError(t *testing.T) {
var lock sync.Mutex
dbAgent.
EXPECT().
BootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}).
DoAndReturn(func(target *inst.InstanceKey) error {
RebootstrapGroupLocked(&inst.InstanceKey{Hostname: testHost, Port: tt.expectedCandidatePort}, "group").
DoAndReturn(func(target *inst.InstanceKey, name string) error {
if target.Hostname == "" || target.Port == 0 {
return errors.New("invalid mysql instance key")
}

Просмотреть файл

@ -63,6 +63,20 @@ func (mr *MockAgentMockRecorder) BootstrapGroupLocked(instanceKey interface{}) *
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "BootstrapGroupLocked", reflect.TypeOf((*MockAgent)(nil).BootstrapGroupLocked), instanceKey)
}
// RebootstrapGroupLocked mocks base method
func (m *MockAgent) RebootstrapGroupLocked(instanceKey *inst.InstanceKey, name string) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "RebootstrapGroupLocked", instanceKey, name)
ret0, _ := ret[0].(error)
return ret0
}
// RebootstrapGroupLocked indicates an expected call of RebootstrapGroupLocked
func (mr *MockAgentMockRecorder) RebootstrapGroupLocked(instanceKey, name interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RebootstrapGroupLocked", reflect.TypeOf((*MockAgent)(nil).RebootstrapGroupLocked), instanceKey, name)
}
// StopGroupLocked mocks base method
func (m *MockAgent) StopGroupLocked(instanceKey *inst.InstanceKey) error {
m.ctrl.T.Helper()
@ -99,8 +113,8 @@ func (m *MockAgent) SetReadOnly(instanceKey *inst.InstanceKey, readOnly bool) er
return ret0
}
// SetSuperReadOnly indicates an expected call of SetSuperReadOnly
func (mr *MockAgentMockRecorder) SetSuperReadOnly(instanceKey, readOnly interface{}) *gomock.Call {
// SetReadOnly indicates an expected call of SetReadOnly
func (mr *MockAgentMockRecorder) SetReadOnly(instanceKey, readOnly interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetReadOnly", reflect.TypeOf((*MockAgent)(nil).SetReadOnly), instanceKey, readOnly)
}

Просмотреть файл

@ -56,6 +56,9 @@ type Agent interface {
// the caller should grab a lock before
BootstrapGroupLocked(instanceKey *inst.InstanceKey) error
// RebootstrapGroupLocked rebootstrap a group with an existing name
RebootstrapGroupLocked(instanceKey *inst.InstanceKey, name string) error
// StopGroupLocked stops a mysql group
StopGroupLocked(instanceKey *inst.InstanceKey) error
@ -175,6 +178,15 @@ func (agent *SQLAgentImpl) BootstrapGroupLocked(instanceKey *inst.InstanceKey) e
log.Infof("Try to bootstrap with a new uuid")
}
log.Infof("Bootstrap group on %v with %v", instanceKey.Hostname, uuid)
return agent.bootstrapInternal(instanceKey, uuid)
}
func (agent *SQLAgentImpl) RebootstrapGroupLocked(instanceKey *inst.InstanceKey, name string) error {
log.Infof("Rebootstrapping group on %v with %v", instanceKey.Hostname, name)
return agent.bootstrapInternal(instanceKey, name)
}
func (agent *SQLAgentImpl) bootstrapInternal(instanceKey *inst.InstanceKey, uuid string) error {
// Use persist to set group_replication_group_name
// so that the instance will persist the name after restart
cmds := []string{
@ -188,6 +200,7 @@ func (agent *SQLAgentImpl) BootstrapGroupLocked(instanceKey *inst.InstanceKey) e
}
for _, cmd := range cmds {
if err := execInstanceWithTopo(instanceKey, cmd); err != nil {
log.Errorf("Failed to execute: %v: %v", cmd, err)
return err
}
}