Removing wrangler old reparent code.

This commit is contained in:
Alain Jobart 2015-04-30 16:24:22 -07:00
Родитель 2e819961fe
Коммит 32aede0709
4 изменённых файлов: 61 добавлений и 687 удалений

Просмотреть файл

@ -6,9 +6,6 @@ package wrangler
/*
This file handles the reparenting operations.
FIXME(alainjobart) a lot of this code is being replaced now.
The new code starts at InitShardMaster.
*/
import (
@ -32,71 +29,14 @@ const (
emergencyReparentShardOperation = "EmergencyReparentShard"
)
// ReparentShard creates the reparenting action and launches a goroutine
// to coordinate the procedure.
//
//
// leaveMasterReadOnly: leave the master in read-only mode, even
// though all the other necessary updates have been made.
// forceReparentToCurrentMaster: mostly for test setups, this can
// cause data loss.
func (wr *Wrangler) ReparentShard(ctx context.Context, keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool, waitSlaveTimeout time.Duration) error {
// lock the shard
actionNode := actionnode.ReparentShard("", masterElectTabletAlias)
lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode)
if err != nil {
return err
}
// FIXME(alainjobart) rework this ShardReplicationStatuses function,
// it's clumpsy
// do the work
err = wr.reparentShardLocked(ctx, keyspace, shard, masterElectTabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster, waitSlaveTimeout)
// and unlock
return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
}
func (wr *Wrangler) reparentShardLocked(ctx context.Context, keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool, waitSlaveTimeout time.Duration) error {
shardInfo, err := wr.ts.GetShard(keyspace, shard)
if err != nil {
return err
}
tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
if err != nil {
return err
}
slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap)
if shardInfo.MasterAlias == masterElectTabletAlias && !forceReparentToCurrentMaster {
return fmt.Errorf("master-elect tablet %v is already master - specify -force to override", masterElectTabletAlias)
}
masterElectTablet, ok := tabletMap[masterElectTabletAlias]
if !ok {
return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, topotools.MapKeys(tabletMap))
}
// Create reusable Reparent event with available info
ev := &events.Reparent{
ShardInfo: *shardInfo,
NewMaster: *masterElectTablet.Tablet,
}
if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok {
ev.OldMaster = *oldMasterTablet.Tablet
}
if !shardInfo.MasterAlias.IsZero() && !forceReparentToCurrentMaster {
err = wr.reparentShardGraceful(ctx, ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, waitSlaveTimeout)
} else {
err = wr.reparentShardBrutal(ctx, ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, forceReparentToCurrentMaster, waitSlaveTimeout)
}
if err == nil {
// only log if it works, if it fails we'll show the error
wr.Logger().Infof("reparentShard finished")
}
return err
// helper struct to queue up results
type rpcContext struct {
tablet *topo.TabletInfo
status *myproto.ReplicationStatus
err error
}
// ShardReplicationStatuses returns the ReplicationStatus for each tablet in a shard.
@ -128,6 +68,60 @@ func (wr *Wrangler) shardReplicationStatuses(ctx context.Context, shardInfo *top
return tablets, stats, err
}
// tabletReplicationStatuses returns the ReplicationStatus of each tablet in
// tablets.
func (wr *Wrangler) tabletReplicationStatuses(ctx context.Context, tablets []*topo.TabletInfo) ([]*myproto.ReplicationStatus, error) {
wr.logger.Infof("tabletReplicationStatuses: %v", tablets)
calls := make([]*rpcContext, len(tablets))
wg := sync.WaitGroup{}
f := func(idx int) {
defer wg.Done()
ti := tablets[idx]
rpcCtx := &rpcContext{tablet: ti}
calls[idx] = rpcCtx
if ti.Type == topo.TYPE_MASTER {
pos, err := wr.tmc.MasterPosition(ctx, ti)
rpcCtx.err = err
if err == nil {
rpcCtx.status = &myproto.ReplicationStatus{Position: pos}
}
} else if ti.IsSlaveType() {
rpcCtx.status, rpcCtx.err = wr.tmc.SlaveStatus(ctx, ti)
}
}
for i, tablet := range tablets {
// Don't scan tablets that won't return something useful. Otherwise, you'll
// end up waiting for a timeout.
if tablet.Type == topo.TYPE_MASTER || tablet.IsSlaveType() {
wg.Add(1)
go f(i)
} else {
wr.logger.Infof("tabletReplicationPositions: skipping tablet %v type %v", tablet.Alias, tablet.Type)
}
}
wg.Wait()
someErrors := false
stats := make([]*myproto.ReplicationStatus, len(tablets))
for i, rpcCtx := range calls {
if rpcCtx == nil {
continue
}
if rpcCtx.err != nil {
wr.logger.Warningf("could not get replication status for tablet %v %v", rpcCtx.tablet.Alias, rpcCtx.err)
someErrors = true
} else {
stats[i] = rpcCtx.status
}
}
if someErrors {
return stats, fmt.Errorf("partial position map, some errors")
}
return stats, nil
}
// ReparentTablet tells a tablet to reparent this tablet to the current
// master, based on the current replication position. If there is no
// match, it will fail.

Просмотреть файл

@ -1,379 +0,0 @@
package wrangler
import (
"fmt"
"sort"
"strings"
"sync"
"time"
myproto "github.com/youtube/vitess/go/vt/mysqlctl/proto"
"github.com/youtube/vitess/go/vt/tabletmanager/actionnode"
"github.com/youtube/vitess/go/vt/topo"
"github.com/youtube/vitess/go/vt/topotools"
"golang.org/x/net/context"
)
// helper struct to queue up results
type rpcContext struct {
tablet *topo.TabletInfo
status *myproto.ReplicationStatus
err error
}
// Check all the tablets replication positions to find if some
// will have a problem, and suggest a fix for them.
func (wr *Wrangler) checkSlaveReplication(ctx context.Context, tabletMap map[topo.TabletAlias]*topo.TabletInfo, masterTabletUID uint32, waitSlaveTimeout time.Duration) error {
wr.logger.Infof("Checking all replication positions will allow the transition:")
masterIsDead := masterTabletUID == topo.NO_TABLET
// now check all the replication positions will allow us to proceed
if masterIsDead {
wr.logger.Infof(" master is dead, not checking Seconds Behind Master value")
}
var lastError error
mutex := sync.Mutex{}
wg := sync.WaitGroup{}
for _, tablet := range tabletMap {
wg.Add(1)
go func(tablet *topo.TabletInfo) {
defer wg.Done()
var err error
defer func() {
if err != nil {
mutex.Lock()
lastError = err
mutex.Unlock()
}
}()
if tablet.Type == topo.TYPE_LAG {
wr.logger.Infof(" skipping slave position check for %v tablet %v", tablet.Type, tablet.Alias)
return
}
status, err := wr.tmc.SlaveStatus(ctx, tablet)
if err != nil {
if tablet.Type == topo.TYPE_BACKUP {
wr.logger.Warningf(" failed to get slave position from backup tablet %v, either wait for backup to finish or scrap tablet (%v)", tablet.Alias, err)
} else {
wr.logger.Warningf(" failed to get slave position from %v: %v", tablet.Alias, err)
}
return
}
if !masterIsDead {
if !status.SlaveRunning() {
err = fmt.Errorf("slave %v is not replicating (Slave_IO or Slave_SQL not running), can't complete reparent in time", tablet.Alias)
wr.logger.Errorf(" %v", err)
return
}
var dur = time.Second * time.Duration(status.SecondsBehindMaster)
if dur > waitSlaveTimeout {
err = fmt.Errorf("slave is too far behind to complete reparent in time (%v>%v), either increase timeout using 'vtctl ReparentShard -wait_slave_timeout=XXX ...' or scrap tablet %v", dur, waitSlaveTimeout, tablet.Alias)
wr.logger.Errorf(" %v", err)
return
}
wr.logger.Infof(" slave is %v behind master (<%v), reparent should work for %v", dur, waitSlaveTimeout, tablet.Alias)
}
}(tablet)
}
wg.Wait()
return lastError
}
// Check all the tablets to see if we can proceed with reparenting.
// masterPosition is supplied from the demoted master if we are doing
// this gracefully.
func (wr *Wrangler) checkSlaveConsistency(ctx context.Context, tabletMap map[uint32]*topo.TabletInfo, masterPosition myproto.ReplicationPosition, waitSlaveTimeout time.Duration) error {
wr.logger.Infof("checkSlaveConsistency %v %#v", topotools.MapKeys(tabletMap), masterPosition)
// FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it.
calls := make(chan *rpcContext, len(tabletMap))
f := func(ti *topo.TabletInfo) {
rpcCtx := &rpcContext{tablet: ti}
defer func() {
calls <- rpcCtx
}()
if !masterPosition.IsZero() {
// If the master position is known, do our best to wait for replication to catch up.
status, err := wr.tmc.WaitSlavePosition(ctx, ti, masterPosition, waitSlaveTimeout)
if err != nil {
rpcCtx.err = err
return
}
rpcCtx.status = status
} else {
// If the master is down, just get the slave status.
status, err := wr.tmc.SlaveStatus(ctx, ti)
if err != nil {
rpcCtx.err = err
return
}
rpcCtx.status = status
}
}
for _, tablet := range tabletMap {
// Pass loop variable explicitly so we don't have a concurrency issue.
go f(tablet)
}
// map positions to tablets
positionMap := make(map[string][]uint32)
for i := 0; i < len(tabletMap); i++ {
rpcCtx := <-calls
mapKey := "unavailable-tablet-error"
if rpcCtx.err == nil {
mapKey = rpcCtx.status.Position.String()
}
if _, ok := positionMap[mapKey]; !ok {
positionMap[mapKey] = make([]uint32, 0, 32)
}
positionMap[mapKey] = append(positionMap[mapKey], rpcCtx.tablet.Alias.Uid)
}
if len(positionMap) == 1 {
// great, everyone agrees
// demotedMasterReplicationState is nil if demotion failed
if !masterPosition.IsZero() {
demotedMapKey := masterPosition.String()
if _, ok := positionMap[demotedMapKey]; !ok {
for slaveMapKey := range positionMap {
return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey,
slaveMapKey)
}
}
}
} else {
// FIXME(msolomon) in the event of a crash, do you pick replica that is
// furthest along or do you promote the majority? data loss vs availability
// sounds like you pick the latest group and reclone.
items := make([]string, 0, 32)
for slaveMapKey, uids := range positionMap {
tabletPaths := make([]string, len(uids))
for i, uid := range uids {
tabletPaths[i] = tabletMap[uid].Alias.String()
}
items = append(items, fmt.Sprintf(" %v\n %v", slaveMapKey, strings.Join(tabletPaths, "\n ")))
}
sort.Strings(items)
return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n"))
}
return nil
}
// Shut off all replication.
func (wr *Wrangler) stopSlaves(ctx context.Context, tabletMap map[topo.TabletAlias]*topo.TabletInfo) error {
errs := make(chan error, len(tabletMap))
f := func(ti *topo.TabletInfo) {
err := wr.tmc.StopSlave(ctx, ti)
if err != nil {
wr.logger.Infof("StopSlave failed: %v", err)
}
errs <- err
}
for _, tablet := range tabletMap {
// Pass loop variable explicitly so we don't have a concurrency issue.
go f(tablet)
}
// wait for responses
for i := 0; i < len(tabletMap); i++ {
if err := <-errs; err != nil {
return err
}
}
return nil
}
// tabletReplicationStatuses returns the ReplicationStatus of each tablet in
// tablets. It handles masters and slaves, but it's up to the caller to
// guarantee all tablets are in the same shard.
func (wr *Wrangler) tabletReplicationStatuses(ctx context.Context, tablets []*topo.TabletInfo) ([]*myproto.ReplicationStatus, error) {
wr.logger.Infof("tabletReplicationStatuses: %v", tablets)
calls := make([]*rpcContext, len(tablets))
wg := sync.WaitGroup{}
f := func(idx int) {
defer wg.Done()
ti := tablets[idx]
rpcCtx := &rpcContext{tablet: ti}
calls[idx] = rpcCtx
if ti.Type == topo.TYPE_MASTER {
pos, err := wr.tmc.MasterPosition(ctx, ti)
rpcCtx.err = err
if err == nil {
rpcCtx.status = &myproto.ReplicationStatus{Position: pos}
}
} else if ti.IsSlaveType() {
rpcCtx.status, rpcCtx.err = wr.tmc.SlaveStatus(ctx, ti)
}
}
for i, tablet := range tablets {
// Don't scan tablets that won't return something useful. Otherwise, you'll
// end up waiting for a timeout.
if tablet.Type == topo.TYPE_MASTER || tablet.IsSlaveType() {
wg.Add(1)
go f(i)
} else {
wr.logger.Infof("tabletReplicationPositions: skipping tablet %v type %v", tablet.Alias, tablet.Type)
}
}
wg.Wait()
someErrors := false
stats := make([]*myproto.ReplicationStatus, len(tablets))
for i, rpcCtx := range calls {
if rpcCtx == nil {
continue
}
if rpcCtx.err != nil {
wr.logger.Warningf("could not get replication status for tablet %v %v", rpcCtx.tablet.Alias, rpcCtx.err)
someErrors = true
} else {
stats[i] = rpcCtx.status
}
}
if someErrors {
return stats, fmt.Errorf("partial position map, some errors")
}
return stats, nil
}
func (wr *Wrangler) demoteMaster(ctx context.Context, ti *topo.TabletInfo) (myproto.ReplicationPosition, error) {
wr.logger.Infof("demote master %v", ti.Alias)
return wr.tmc.DemoteMaster(ctx, ti)
}
func (wr *Wrangler) promoteSlave(ctx context.Context, ti *topo.TabletInfo) (rsd *actionnode.RestartSlaveData, err error) {
wr.logger.Infof("promote slave %v", ti.Alias)
return wr.tmc.PromoteSlave(ctx, ti)
}
func (wr *Wrangler) restartSlaves(ctx context.Context, slaveTabletMap map[topo.TabletAlias]*topo.TabletInfo, rsd *actionnode.RestartSlaveData) (majorityRestart bool, err error) {
wg := new(sync.WaitGroup)
slaves := topotools.CopyMapValues(slaveTabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
errs := make([]error, len(slaveTabletMap))
f := func(i int) {
errs[i] = wr.restartSlave(ctx, slaves[i], rsd)
if errs[i] != nil {
// FIXME(msolomon) Don't bail early, just mark this phase as
// failed. We might decide to proceed if enough of these
// succeed.
//
// FIXME(msolomon) This is a somewhat delicate retry - have to
// figure out why it failed on the tablet end. This could lead
// to a nasty case of having to recompute where to start
// replication. Practically speaking, that chance is pretty low.
wr.logger.Warningf("restart slave failed: %v %v", slaves[i].Alias, errs[i])
}
wg.Done()
}
for i := range slaves {
wg.Add(1)
go f(i)
}
wg.Wait()
errCount := 0
badTablets := make([]string, 0, 16)
for i, err := range errs {
if err != nil {
errCount++
badTablets = append(badTablets, slaves[i].Alias.String())
}
}
// Phrase the question with multiplication so we don't get caught by int
// division rounding.
majorityRestart = errCount*2 < len(slaveTabletMap)
if errCount > 0 {
err = fmt.Errorf("restart slave failed on some tablets (%v): %v", errCount, strings.Join(badTablets, ", "))
}
return
}
func (wr *Wrangler) restartSlave(ctx context.Context, ti *topo.TabletInfo, rsd *actionnode.RestartSlaveData) (err error) {
wr.logger.Infof("restart slave %v", ti.Alias)
return wr.tmc.RestartSlave(ctx, ti, rsd)
}
func (wr *Wrangler) checkMasterElect(ctx context.Context, ti *topo.TabletInfo) error {
// Check the master-elect is fit for duty - try to ping it.
// if the server was already serving live traffic, it's probably good
if ti.IsInServingGraph() {
return nil
}
return wr.tmc.Ping(ctx, ti)
}
func (wr *Wrangler) finishReparent(ctx context.Context, si *topo.ShardInfo, masterElect *topo.TabletInfo, majorityRestart, leaveMasterReadOnly bool) error {
// If the majority of slaves restarted, move ahead.
if majorityRestart {
if leaveMasterReadOnly {
wr.logger.Warningf("leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
} else {
wr.logger.Infof("marking master-elect read-write %v", masterElect.Alias)
if err := wr.tmc.SetReadWrite(ctx, masterElect); err != nil {
wr.logger.Warningf("master master-elect read-write failed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
}
}
} else {
wr.logger.Warningf("minority reparent, manual fixes are needed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
}
// save the new master in the shard info
si.MasterAlias = masterElect.Alias
if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
wr.logger.Errorf("Failed to save new master into shard: %v", err)
return err
}
// We rebuild all the cells, as we may have taken tablets in and
// out of the graph.
wr.logger.Infof("rebuilding shard serving graph data")
_, err := wr.RebuildShardGraph(ctx, masterElect.Keyspace, masterElect.Shard, nil)
return err
}
func (wr *Wrangler) breakReplication(ctx context.Context, slaveMap map[topo.TabletAlias]*topo.TabletInfo, masterElect *topo.TabletInfo) error {
// We are forcing a reparenting. Make sure that all slaves stop so
// no data is accidentally replicated through before we call RestartSlave.
wr.logger.Infof("stop slaves %v", masterElect.Alias)
err := wr.stopSlaves(ctx, slaveMap)
if err != nil {
return err
}
// Force slaves to break, just in case they were not advertised in
// the replication graph.
wr.logger.Infof("break slaves %v", masterElect.Alias)
return wr.tmc.BreakSlaves(ctx, masterElect)
}
func (wr *Wrangler) restartableTabletMap(slaves map[topo.TabletAlias]*topo.TabletInfo) map[uint32]*topo.TabletInfo {
// Under normal circumstances, prune out lag as not restartable.
// These types are explicitly excluded from reparenting since you
// will just wait forever for them to catch up. A possible
// improvement is waiting for the io thread to reach the same
// position as the sql thread on a normal slave.
tabletMap := make(map[uint32]*topo.TabletInfo)
for _, ti := range slaves {
if ti.Type != topo.TYPE_LAG {
tabletMap[ti.Alias.Uid] = ti
} else {
wr.logger.Infof("skipping reparent action for tablet %v %v", ti.Type, ti.Alias)
}
}
return tabletMap
}

Просмотреть файл

@ -1,110 +0,0 @@
package wrangler
import (
"fmt"
"time"
"github.com/youtube/vitess/go/event"
myproto "github.com/youtube/vitess/go/vt/mysqlctl/proto"
"github.com/youtube/vitess/go/vt/topo"
"github.com/youtube/vitess/go/vt/topotools"
"github.com/youtube/vitess/go/vt/topotools/events"
"golang.org/x/net/context"
)
// reparentShardBrutal executes a brutal reparent.
//
// Assume the master is dead and not coming back. Just push your way
// forward. Force means we are reparenting to the same master
// (assuming the data has been externally synched).
//
// The ev parameter is an event struct prefilled with information that the
// caller has on hand, which would be expensive for us to re-query.
func (wr *Wrangler) reparentShardBrutal(ctx context.Context, ev *events.Reparent, si *topo.ShardInfo, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly, force bool, waitSlaveTimeout time.Duration) (err error) {
event.DispatchUpdate(ev, "starting brutal")
defer func() {
if err != nil {
event.DispatchUpdate(ev, "failed: "+err.Error())
}
}()
wr.logger.Infof("Skipping ValidateShard - not a graceful situation")
if _, ok := slaveTabletMap[masterElectTablet.Alias]; !ok && !force {
return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias, si.Keyspace(), si.ShardName(), topotools.MapKeys(slaveTabletMap))
}
// Check the master-elect and slaves are in good shape when the action
// has not been forced.
if !force {
// Make sure all tablets have the right parent and reasonable positions.
event.DispatchUpdate(ev, "checking slave replication positions")
if err := wr.checkSlaveReplication(ctx, slaveTabletMap, topo.NO_TABLET, waitSlaveTimeout); err != nil {
return err
}
// Check the master-elect is fit for duty - call out for hardware checks.
event.DispatchUpdate(ev, "checking that new master is ready to serve")
if err := wr.checkMasterElect(ctx, masterElectTablet); err != nil {
return err
}
event.DispatchUpdate(ev, "checking slave consistency")
wr.logger.Infof("check slaves %v/%v", masterElectTablet.Keyspace, masterElectTablet.Shard)
restartableSlaveTabletMap := wr.restartableTabletMap(slaveTabletMap)
err = wr.checkSlaveConsistency(ctx, restartableSlaveTabletMap, myproto.ReplicationPosition{}, waitSlaveTimeout)
if err != nil {
return err
}
} else {
event.DispatchUpdate(ev, "stopping slave replication")
wr.logger.Infof("forcing reparent to same master %v", masterElectTablet.Alias)
err := wr.breakReplication(ctx, slaveTabletMap, masterElectTablet)
if err != nil {
return err
}
}
event.DispatchUpdate(ev, "promoting new master")
rsd, err := wr.promoteSlave(ctx, masterElectTablet)
if err != nil {
// FIXME(msolomon) This suggests that the master-elect is dead.
// We need to classify certain errors as temporary and retry.
return fmt.Errorf("promote slave failed: %v %v", err, masterElectTablet.Alias)
}
// Once the slave is promoted, remove it from our maps
delete(slaveTabletMap, masterElectTablet.Alias)
delete(masterTabletMap, masterElectTablet.Alias)
event.DispatchUpdate(ev, "restarting slaves")
majorityRestart, restartSlaveErr := wr.restartSlaves(ctx, slaveTabletMap, rsd)
if !force {
for _, failedMaster := range masterTabletMap {
event.DispatchUpdate(ev, "scrapping old master")
wr.logger.Infof("scrap dead master %v", failedMaster.Alias)
// The master is dead so execute the action locally instead of
// enqueing the scrap action for an arbitrary amount of time.
if scrapErr := topotools.Scrap(ctx, wr.ts, failedMaster.Alias, false); scrapErr != nil {
wr.logger.Warningf("scrapping failed master failed: %v", scrapErr)
}
}
}
event.DispatchUpdate(ev, "rebuilding shard serving graph")
err = wr.finishReparent(ctx, si, masterElectTablet, majorityRestart, leaveMasterReadOnly)
if err != nil {
return err
}
event.DispatchUpdate(ev, "finished")
if restartSlaveErr != nil {
// This is more of a warning at this point.
return restartSlaveErr
}
return nil
}

Просмотреть файл

@ -1,131 +0,0 @@
// Copyright 2012, Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package wrangler
import (
"fmt"
"strings"
"time"
"github.com/youtube/vitess/go/event"
"github.com/youtube/vitess/go/vt/topo"
"github.com/youtube/vitess/go/vt/topotools"
"github.com/youtube/vitess/go/vt/topotools/events"
"golang.org/x/net/context"
)
// reparentShardGraceful executes a graceful reparent.
// The ev parameter is an event struct prefilled with information that the
// caller has on hand, which would be expensive for us to re-query.
func (wr *Wrangler) reparentShardGraceful(ctx context.Context, ev *events.Reparent, si *topo.ShardInfo, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly bool, waitSlaveTimeout time.Duration) (err error) {
event.DispatchUpdate(ev, "starting graceful")
defer func() {
if err != nil {
event.DispatchUpdate(ev, "failed: "+err.Error())
}
}()
// Validate a bunch of assumptions we make about the replication graph.
if len(masterTabletMap) != 1 {
aliases := make([]string, 0, len(masterTabletMap))
for _, v := range masterTabletMap {
aliases = append(aliases, v.String())
}
return fmt.Errorf("I have 0 or multiple masters / scrapped tablets in this shard replication graph, please scrap the non-master ones: %v", strings.Join(aliases, " "))
}
var masterTablet *topo.TabletInfo
for _, v := range masterTabletMap {
masterTablet = v
}
if masterTablet.Type != topo.TYPE_MASTER {
return fmt.Errorf("master tablet should not be type: %v %v", masterTablet.Type, masterTablet.Alias)
}
if masterTablet.Alias.Uid == masterElectTablet.Alias.Uid {
return fmt.Errorf("master tablet should not match master elect - this must be forced: %v", masterTablet.Alias)
}
if _, ok := slaveTabletMap[masterElectTablet.Alias]; !ok {
return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias, masterTablet.Keyspace, masterTablet.Shard, topotools.MapKeys(slaveTabletMap))
}
if err := wr.ValidateShard(ctx, masterTablet.Keyspace, masterTablet.Shard, true); err != nil {
return fmt.Errorf("ValidateShard verification failed: %v, if the master is dead, run: vtctl ScrapTablet -force %v", err, masterTablet.Alias)
}
// Make sure all tablets have the right parent and reasonable positions.
event.DispatchUpdate(ev, "checking slave replication positions")
err = wr.checkSlaveReplication(ctx, slaveTabletMap, masterTablet.Alias.Uid, waitSlaveTimeout)
if err != nil {
return err
}
// Check the master-elect is fit for duty - call out for hardware checks.
event.DispatchUpdate(ev, "checking that new master is ready to serve")
err = wr.checkMasterElect(ctx, masterElectTablet)
if err != nil {
return err
}
event.DispatchUpdate(ev, "demoting old master")
masterPosition, err := wr.demoteMaster(ctx, masterTablet)
if err != nil {
// FIXME(msolomon) This suggests that the master is dead and we
// need to take steps. We could either pop a prompt, or make
// retrying the action painless.
return fmt.Errorf("demote master failed: %v, if the master is dead, run: vtctl -force ScrapTablet %v", err, masterTablet.Alias)
}
event.DispatchUpdate(ev, "checking slave consistency")
wr.logger.Infof("check slaves %v/%v", masterTablet.Keyspace, masterTablet.Shard)
restartableSlaveTabletMap := wr.restartableTabletMap(slaveTabletMap)
err = wr.checkSlaveConsistency(ctx, restartableSlaveTabletMap, masterPosition, waitSlaveTimeout)
if err != nil {
return fmt.Errorf("check slave consistency failed %v, demoted master is still read only, run: vtctl SetReadWrite %v", err, masterTablet.Alias)
}
event.DispatchUpdate(ev, "promoting new master")
rsd, err := wr.promoteSlave(ctx, masterElectTablet)
if err != nil {
// FIXME(msolomon) This suggests that the master-elect is dead.
// We need to classify certain errors as temporary and retry.
return fmt.Errorf("promote slave failed: %v, demoted master is still read only: vtctl SetReadWrite %v", err, masterTablet.Alias)
}
// Once the slave is promoted, remove it from our map
delete(slaveTabletMap, masterElectTablet.Alias)
event.DispatchUpdate(ev, "restarting slaves")
majorityRestart, restartSlaveErr := wr.restartSlaves(ctx, slaveTabletMap, rsd)
// For now, scrap the old master regardless of how many
// slaves restarted.
//
// FIXME(msolomon) We could reintroduce it and reparent it and use
// it as new replica.
event.DispatchUpdate(ev, "scrapping old master")
wr.logger.Infof("scrap demoted master %v", masterTablet.Alias)
if scrapErr := wr.tmc.Scrap(ctx, masterTablet); scrapErr != nil {
// The sub action is non-critical, so just warn.
wr.logger.Warningf("scrap demoted master failed: %v", scrapErr)
}
event.DispatchUpdate(ev, "rebuilding shard serving graph")
err = wr.finishReparent(ctx, si, masterElectTablet, majorityRestart, leaveMasterReadOnly)
if err != nil {
return err
}
event.DispatchUpdate(ev, "finished")
if restartSlaveErr != nil {
// This is more of a warning at this point.
return restartSlaveErr
}
return nil
}