Removing wrangler old reparent code.

2015-04-30 16:24:22 -07:00 · 2015-04-30 16:24:22 -07:00 · 32aede0709
--- a/go/vt/wrangler/reparent.go
+++ b/go/vt/wrangler/reparent.go
@ -6,9 +6,6 @@ package wrangler

 /*
 This file handles the reparenting operations.
-
-FIXME(alainjobart) a lot of this code is being replaced now.
-The new code starts at InitShardMaster.
 */

 import (
@ -32,71 +29,14 @@ const (
 	emergencyReparentShardOperation = "EmergencyReparentShard"
 )

-// ReparentShard creates the reparenting action and launches a goroutine
-// to coordinate the procedure.
-//
-//
-// leaveMasterReadOnly: leave the master in read-only mode, even
-//   though all the other necessary updates have been made.
-// forceReparentToCurrentMaster: mostly for test setups, this can
-//   cause data loss.
-func (wr *Wrangler) ReparentShard(ctx context.Context, keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool, waitSlaveTimeout time.Duration) error {
-	// lock the shard
-	actionNode := actionnode.ReparentShard("", masterElectTabletAlias)
-	lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode)
-	if err != nil {
-		return err
-	}
+// FIXME(alainjobart) rework this ShardReplicationStatuses function,
+// it's clumpsy

-	// do the work
-	err = wr.reparentShardLocked(ctx, keyspace, shard, masterElectTabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster, waitSlaveTimeout)
-
-	// and unlock
-	return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
-}
-
-func (wr *Wrangler) reparentShardLocked(ctx context.Context, keyspace, shard string, masterElectTabletAlias topo.TabletAlias, leaveMasterReadOnly, forceReparentToCurrentMaster bool, waitSlaveTimeout time.Duration) error {
-	shardInfo, err := wr.ts.GetShard(keyspace, shard)
-	if err != nil {
-		return err
-	}
-
-	tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
-	if err != nil {
-		return err
-	}
-
-	slaveTabletMap, masterTabletMap := topotools.SortedTabletMap(tabletMap)
-	if shardInfo.MasterAlias == masterElectTabletAlias && !forceReparentToCurrentMaster {
-		return fmt.Errorf("master-elect tablet %v is already master - specify -force to override", masterElectTabletAlias)
-	}
-
-	masterElectTablet, ok := tabletMap[masterElectTabletAlias]
-	if !ok {
-		return fmt.Errorf("master-elect tablet %v not found in replication graph %v/%v %v", masterElectTabletAlias, keyspace, shard, topotools.MapKeys(tabletMap))
-	}
-
-	// Create reusable Reparent event with available info
-	ev := &events.Reparent{
-		ShardInfo: *shardInfo,
-		NewMaster: *masterElectTablet.Tablet,
-	}
-
-	if oldMasterTablet, ok := tabletMap[shardInfo.MasterAlias]; ok {
-		ev.OldMaster = *oldMasterTablet.Tablet
-	}
-
-	if !shardInfo.MasterAlias.IsZero() && !forceReparentToCurrentMaster {
-		err = wr.reparentShardGraceful(ctx, ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, waitSlaveTimeout)
-	} else {
-		err = wr.reparentShardBrutal(ctx, ev, shardInfo, slaveTabletMap, masterTabletMap, masterElectTablet, leaveMasterReadOnly, forceReparentToCurrentMaster, waitSlaveTimeout)
-	}
-
-	if err == nil {
-		// only log if it works, if it fails we'll show the error
-		wr.Logger().Infof("reparentShard finished")
-	}
-	return err
+// helper struct to queue up results
+type rpcContext struct {
+	tablet *topo.TabletInfo
+	status *myproto.ReplicationStatus
+	err    error
 }

 // ShardReplicationStatuses returns the ReplicationStatus for each tablet in a shard.
@ -128,6 +68,60 @@ func (wr *Wrangler) shardReplicationStatuses(ctx context.Context, shardInfo *top
 	return tablets, stats, err
 }

+// tabletReplicationStatuses returns the ReplicationStatus of each tablet in
+// tablets.
+func (wr *Wrangler) tabletReplicationStatuses(ctx context.Context, tablets []*topo.TabletInfo) ([]*myproto.ReplicationStatus, error) {
+	wr.logger.Infof("tabletReplicationStatuses: %v", tablets)
+	calls := make([]*rpcContext, len(tablets))
+	wg := sync.WaitGroup{}
+
+	f := func(idx int) {
+		defer wg.Done()
+		ti := tablets[idx]
+		rpcCtx := &rpcContext{tablet: ti}
+		calls[idx] = rpcCtx
+		if ti.Type == topo.TYPE_MASTER {
+			pos, err := wr.tmc.MasterPosition(ctx, ti)
+			rpcCtx.err = err
+			if err == nil {
+				rpcCtx.status = &myproto.ReplicationStatus{Position: pos}
+			}
+		} else if ti.IsSlaveType() {
+			rpcCtx.status, rpcCtx.err = wr.tmc.SlaveStatus(ctx, ti)
+		}
+	}
+
+	for i, tablet := range tablets {
+		// Don't scan tablets that won't return something useful. Otherwise, you'll
+		// end up waiting for a timeout.
+		if tablet.Type == topo.TYPE_MASTER || tablet.IsSlaveType() {
+			wg.Add(1)
+			go f(i)
+		} else {
+			wr.logger.Infof("tabletReplicationPositions: skipping tablet %v type %v", tablet.Alias, tablet.Type)
+		}
+	}
+	wg.Wait()
+
+	someErrors := false
+	stats := make([]*myproto.ReplicationStatus, len(tablets))
+	for i, rpcCtx := range calls {
+		if rpcCtx == nil {
+			continue
+		}
+		if rpcCtx.err != nil {
+			wr.logger.Warningf("could not get replication status for tablet %v %v", rpcCtx.tablet.Alias, rpcCtx.err)
+			someErrors = true
+		} else {
+			stats[i] = rpcCtx.status
+		}
+	}
+	if someErrors {
+		return stats, fmt.Errorf("partial position map, some errors")
+	}
+	return stats, nil
+}
+
 // ReparentTablet tells a tablet to reparent this tablet to the current
 // master, based on the current replication position. If there is no
 // match, it will fail.
--- a/go/vt/wrangler/reparent_action.go
+++ b/go/vt/wrangler/reparent_action.go
@ -1,379 +0,0 @@
-package wrangler
-
-import (
-	"fmt"
-	"sort"
-	"strings"
-	"sync"
-	"time"
-
-	myproto "github.com/youtube/vitess/go/vt/mysqlctl/proto"
-	"github.com/youtube/vitess/go/vt/tabletmanager/actionnode"
-	"github.com/youtube/vitess/go/vt/topo"
-	"github.com/youtube/vitess/go/vt/topotools"
-	"golang.org/x/net/context"
-)
-
-// helper struct to queue up results
-type rpcContext struct {
-	tablet *topo.TabletInfo
-	status *myproto.ReplicationStatus
-	err    error
-}
-
-// Check all the tablets replication positions to find if some
-// will have a problem, and suggest a fix for them.
-func (wr *Wrangler) checkSlaveReplication(ctx context.Context, tabletMap map[topo.TabletAlias]*topo.TabletInfo, masterTabletUID uint32, waitSlaveTimeout time.Duration) error {
-	wr.logger.Infof("Checking all replication positions will allow the transition:")
-	masterIsDead := masterTabletUID == topo.NO_TABLET
-
-	// now check all the replication positions will allow us to proceed
-	if masterIsDead {
-		wr.logger.Infof("  master is dead, not checking Seconds Behind Master value")
-	}
-	var lastError error
-	mutex := sync.Mutex{}
-	wg := sync.WaitGroup{}
-	for _, tablet := range tabletMap {
-		wg.Add(1)
-		go func(tablet *topo.TabletInfo) {
-			defer wg.Done()
-
-			var err error
-			defer func() {
-				if err != nil {
-					mutex.Lock()
-					lastError = err
-					mutex.Unlock()
-				}
-			}()
-
-			if tablet.Type == topo.TYPE_LAG {
-				wr.logger.Infof("  skipping slave position check for %v tablet %v", tablet.Type, tablet.Alias)
-				return
-			}
-
-			status, err := wr.tmc.SlaveStatus(ctx, tablet)
-			if err != nil {
-				if tablet.Type == topo.TYPE_BACKUP {
-					wr.logger.Warningf("  failed to get slave position from backup tablet %v, either wait for backup to finish or scrap tablet (%v)", tablet.Alias, err)
-				} else {
-					wr.logger.Warningf("  failed to get slave position from %v: %v", tablet.Alias, err)
-				}
-				return
-			}
-
-			if !masterIsDead {
-				if !status.SlaveRunning() {
-					err = fmt.Errorf("slave %v is not replicating (Slave_IO or Slave_SQL not running), can't complete reparent in time", tablet.Alias)
-					wr.logger.Errorf("  %v", err)
-					return
-				}
-
-				var dur = time.Second * time.Duration(status.SecondsBehindMaster)
-				if dur > waitSlaveTimeout {
-					err = fmt.Errorf("slave is too far behind to complete reparent in time (%v>%v), either increase timeout using 'vtctl ReparentShard -wait_slave_timeout=XXX ...' or scrap tablet %v", dur, waitSlaveTimeout, tablet.Alias)
-					wr.logger.Errorf("  %v", err)
-					return
-				}
-
-				wr.logger.Infof("  slave is %v behind master (<%v), reparent should work for %v", dur, waitSlaveTimeout, tablet.Alias)
-			}
-		}(tablet)
-	}
-	wg.Wait()
-	return lastError
-}
-
-// Check all the tablets to see if we can proceed with reparenting.
-// masterPosition is supplied from the demoted master if we are doing
-// this gracefully.
-func (wr *Wrangler) checkSlaveConsistency(ctx context.Context, tabletMap map[uint32]*topo.TabletInfo, masterPosition myproto.ReplicationPosition, waitSlaveTimeout time.Duration) error {
-	wr.logger.Infof("checkSlaveConsistency %v %#v", topotools.MapKeys(tabletMap), masterPosition)
-
-	// FIXME(msolomon) Something still feels clumsy here and I can't put my finger on it.
-	calls := make(chan *rpcContext, len(tabletMap))
-	f := func(ti *topo.TabletInfo) {
-		rpcCtx := &rpcContext{tablet: ti}
-		defer func() {
-			calls <- rpcCtx
-		}()
-
-		if !masterPosition.IsZero() {
-			// If the master position is known, do our best to wait for replication to catch up.
-			status, err := wr.tmc.WaitSlavePosition(ctx, ti, masterPosition, waitSlaveTimeout)
-			if err != nil {
-				rpcCtx.err = err
-				return
-			}
-			rpcCtx.status = status
-		} else {
-			// If the master is down, just get the slave status.
-			status, err := wr.tmc.SlaveStatus(ctx, ti)
-			if err != nil {
-				rpcCtx.err = err
-				return
-			}
-			rpcCtx.status = status
-		}
-	}
-
-	for _, tablet := range tabletMap {
-		// Pass loop variable explicitly so we don't have a concurrency issue.
-		go f(tablet)
-	}
-
-	// map positions to tablets
-	positionMap := make(map[string][]uint32)
-	for i := 0; i < len(tabletMap); i++ {
-		rpcCtx := <-calls
-		mapKey := "unavailable-tablet-error"
-		if rpcCtx.err == nil {
-			mapKey = rpcCtx.status.Position.String()
-		}
-		if _, ok := positionMap[mapKey]; !ok {
-			positionMap[mapKey] = make([]uint32, 0, 32)
-		}
-		positionMap[mapKey] = append(positionMap[mapKey], rpcCtx.tablet.Alias.Uid)
-	}
-
-	if len(positionMap) == 1 {
-		// great, everyone agrees
-		// demotedMasterReplicationState is nil if demotion failed
-		if !masterPosition.IsZero() {
-			demotedMapKey := masterPosition.String()
-			if _, ok := positionMap[demotedMapKey]; !ok {
-				for slaveMapKey := range positionMap {
-					return fmt.Errorf("slave position doesn't match demoted master: %v != %v", demotedMapKey,
-						slaveMapKey)
-				}
-			}
-		}
-	} else {
-		// FIXME(msolomon) in the event of a crash, do you pick replica that is
-		// furthest along or do you promote the majority? data loss vs availability
-		// sounds like you pick the latest group and reclone.
-		items := make([]string, 0, 32)
-		for slaveMapKey, uids := range positionMap {
-			tabletPaths := make([]string, len(uids))
-			for i, uid := range uids {
-				tabletPaths[i] = tabletMap[uid].Alias.String()
-			}
-			items = append(items, fmt.Sprintf("  %v\n    %v", slaveMapKey, strings.Join(tabletPaths, "\n    ")))
-		}
-		sort.Strings(items)
-		return fmt.Errorf("inconsistent slaves, mark some offline with vtctl ScrapTablet\n%v", strings.Join(items, "\n"))
-	}
-	return nil
-}
-
-// Shut off all replication.
-func (wr *Wrangler) stopSlaves(ctx context.Context, tabletMap map[topo.TabletAlias]*topo.TabletInfo) error {
-	errs := make(chan error, len(tabletMap))
-	f := func(ti *topo.TabletInfo) {
-		err := wr.tmc.StopSlave(ctx, ti)
-		if err != nil {
-			wr.logger.Infof("StopSlave failed: %v", err)
-		}
-		errs <- err
-	}
-
-	for _, tablet := range tabletMap {
-		// Pass loop variable explicitly so we don't have a concurrency issue.
-		go f(tablet)
-	}
-
-	// wait for responses
-	for i := 0; i < len(tabletMap); i++ {
-		if err := <-errs; err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// tabletReplicationStatuses returns the ReplicationStatus of each tablet in
-// tablets. It handles masters and slaves, but it's up to the caller to
-// guarantee all tablets are in the same shard.
-func (wr *Wrangler) tabletReplicationStatuses(ctx context.Context, tablets []*topo.TabletInfo) ([]*myproto.ReplicationStatus, error) {
-	wr.logger.Infof("tabletReplicationStatuses: %v", tablets)
-	calls := make([]*rpcContext, len(tablets))
-	wg := sync.WaitGroup{}
-
-	f := func(idx int) {
-		defer wg.Done()
-		ti := tablets[idx]
-		rpcCtx := &rpcContext{tablet: ti}
-		calls[idx] = rpcCtx
-		if ti.Type == topo.TYPE_MASTER {
-			pos, err := wr.tmc.MasterPosition(ctx, ti)
-			rpcCtx.err = err
-			if err == nil {
-				rpcCtx.status = &myproto.ReplicationStatus{Position: pos}
-			}
-		} else if ti.IsSlaveType() {
-			rpcCtx.status, rpcCtx.err = wr.tmc.SlaveStatus(ctx, ti)
-		}
-	}
-
-	for i, tablet := range tablets {
-		// Don't scan tablets that won't return something useful. Otherwise, you'll
-		// end up waiting for a timeout.
-		if tablet.Type == topo.TYPE_MASTER || tablet.IsSlaveType() {
-			wg.Add(1)
-			go f(i)
-		} else {
-			wr.logger.Infof("tabletReplicationPositions: skipping tablet %v type %v", tablet.Alias, tablet.Type)
-		}
-	}
-	wg.Wait()
-
-	someErrors := false
-	stats := make([]*myproto.ReplicationStatus, len(tablets))
-	for i, rpcCtx := range calls {
-		if rpcCtx == nil {
-			continue
-		}
-		if rpcCtx.err != nil {
-			wr.logger.Warningf("could not get replication status for tablet %v %v", rpcCtx.tablet.Alias, rpcCtx.err)
-			someErrors = true
-		} else {
-			stats[i] = rpcCtx.status
-		}
-	}
-	if someErrors {
-		return stats, fmt.Errorf("partial position map, some errors")
-	}
-	return stats, nil
-}
-
-func (wr *Wrangler) demoteMaster(ctx context.Context, ti *topo.TabletInfo) (myproto.ReplicationPosition, error) {
-	wr.logger.Infof("demote master %v", ti.Alias)
-	return wr.tmc.DemoteMaster(ctx, ti)
-}
-
-func (wr *Wrangler) promoteSlave(ctx context.Context, ti *topo.TabletInfo) (rsd *actionnode.RestartSlaveData, err error) {
-	wr.logger.Infof("promote slave %v", ti.Alias)
-	return wr.tmc.PromoteSlave(ctx, ti)
-}
-
-func (wr *Wrangler) restartSlaves(ctx context.Context, slaveTabletMap map[topo.TabletAlias]*topo.TabletInfo, rsd *actionnode.RestartSlaveData) (majorityRestart bool, err error) {
-	wg := new(sync.WaitGroup)
-	slaves := topotools.CopyMapValues(slaveTabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
-	errs := make([]error, len(slaveTabletMap))
-
-	f := func(i int) {
-		errs[i] = wr.restartSlave(ctx, slaves[i], rsd)
-		if errs[i] != nil {
-			// FIXME(msolomon) Don't bail early, just mark this phase as
-			// failed. We might decide to proceed if enough of these
-			// succeed.
-			//
-			// FIXME(msolomon) This is a somewhat delicate retry - have to
-			// figure out why it failed on the tablet end. This could lead
-			// to a nasty case of having to recompute where to start
-			// replication. Practically speaking, that chance is pretty low.
-			wr.logger.Warningf("restart slave failed: %v %v", slaves[i].Alias, errs[i])
-		}
-		wg.Done()
-	}
-
-	for i := range slaves {
-		wg.Add(1)
-		go f(i)
-	}
-	wg.Wait()
-
-	errCount := 0
-	badTablets := make([]string, 0, 16)
-	for i, err := range errs {
-		if err != nil {
-			errCount++
-			badTablets = append(badTablets, slaves[i].Alias.String())
-		}
-	}
-	// Phrase the question with multiplication so we don't get caught by int
-	// division rounding.
-	majorityRestart = errCount*2 < len(slaveTabletMap)
-
-	if errCount > 0 {
-		err = fmt.Errorf("restart slave failed on some tablets (%v): %v", errCount, strings.Join(badTablets, ", "))
-	}
-	return
-}
-
-func (wr *Wrangler) restartSlave(ctx context.Context, ti *topo.TabletInfo, rsd *actionnode.RestartSlaveData) (err error) {
-	wr.logger.Infof("restart slave %v", ti.Alias)
-	return wr.tmc.RestartSlave(ctx, ti, rsd)
-}
-
-func (wr *Wrangler) checkMasterElect(ctx context.Context, ti *topo.TabletInfo) error {
-	// Check the master-elect is fit for duty - try to ping it.
-	// if the server was already serving live traffic, it's probably good
-	if ti.IsInServingGraph() {
-		return nil
-	}
-	return wr.tmc.Ping(ctx, ti)
-}
-
-func (wr *Wrangler) finishReparent(ctx context.Context, si *topo.ShardInfo, masterElect *topo.TabletInfo, majorityRestart, leaveMasterReadOnly bool) error {
-	// If the majority of slaves restarted, move ahead.
-	if majorityRestart {
-		if leaveMasterReadOnly {
-			wr.logger.Warningf("leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
-		} else {
-			wr.logger.Infof("marking master-elect read-write %v", masterElect.Alias)
-			if err := wr.tmc.SetReadWrite(ctx, masterElect); err != nil {
-				wr.logger.Warningf("master master-elect read-write failed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
-			}
-		}
-	} else {
-		wr.logger.Warningf("minority reparent, manual fixes are needed, leaving master-elect read-only, change with: vtctl SetReadWrite %v", masterElect.Alias)
-	}
-
-	// save the new master in the shard info
-	si.MasterAlias = masterElect.Alias
-	if err := topo.UpdateShard(ctx, wr.ts, si); err != nil {
-		wr.logger.Errorf("Failed to save new master into shard: %v", err)
-		return err
-	}
-
-	// We rebuild all the cells, as we may have taken tablets in and
-	// out of the graph.
-	wr.logger.Infof("rebuilding shard serving graph data")
-	_, err := wr.RebuildShardGraph(ctx, masterElect.Keyspace, masterElect.Shard, nil)
-	return err
-}
-
-func (wr *Wrangler) breakReplication(ctx context.Context, slaveMap map[topo.TabletAlias]*topo.TabletInfo, masterElect *topo.TabletInfo) error {
-	// We are forcing a reparenting. Make sure that all slaves stop so
-	// no data is accidentally replicated through before we call RestartSlave.
-	wr.logger.Infof("stop slaves %v", masterElect.Alias)
-	err := wr.stopSlaves(ctx, slaveMap)
-	if err != nil {
-		return err
-	}
-
-	// Force slaves to break, just in case they were not advertised in
-	// the replication graph.
-	wr.logger.Infof("break slaves %v", masterElect.Alias)
-	return wr.tmc.BreakSlaves(ctx, masterElect)
-}
-
-func (wr *Wrangler) restartableTabletMap(slaves map[topo.TabletAlias]*topo.TabletInfo) map[uint32]*topo.TabletInfo {
-	// Under normal circumstances, prune out lag as not restartable.
-	// These types are explicitly excluded from reparenting since you
-	// will just wait forever for them to catch up.  A possible
-	// improvement is waiting for the io thread to reach the same
-	// position as the sql thread on a normal slave.
-	tabletMap := make(map[uint32]*topo.TabletInfo)
-	for _, ti := range slaves {
-		if ti.Type != topo.TYPE_LAG {
-			tabletMap[ti.Alias.Uid] = ti
-		} else {
-			wr.logger.Infof("skipping reparent action for tablet %v %v", ti.Type, ti.Alias)
-		}
-	}
-	return tabletMap
-}
--- a/go/vt/wrangler/reparent_brutal.go
+++ b/go/vt/wrangler/reparent_brutal.go
@ -1,110 +0,0 @@
-package wrangler
-
-import (
-	"fmt"
-	"time"
-
-	"github.com/youtube/vitess/go/event"
-	myproto "github.com/youtube/vitess/go/vt/mysqlctl/proto"
-	"github.com/youtube/vitess/go/vt/topo"
-	"github.com/youtube/vitess/go/vt/topotools"
-	"github.com/youtube/vitess/go/vt/topotools/events"
-	"golang.org/x/net/context"
-)
-
-// reparentShardBrutal executes a brutal reparent.
-//
-// Assume the master is dead and not coming back. Just push your way
-// forward.  Force means we are reparenting to the same master
-// (assuming the data has been externally synched).
-//
-// The ev parameter is an event struct prefilled with information that the
-// caller has on hand, which would be expensive for us to re-query.
-func (wr *Wrangler) reparentShardBrutal(ctx context.Context, ev *events.Reparent, si *topo.ShardInfo, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly, force bool, waitSlaveTimeout time.Duration) (err error) {
-	event.DispatchUpdate(ev, "starting brutal")
-
-	defer func() {
-		if err != nil {
-			event.DispatchUpdate(ev, "failed: "+err.Error())
-		}
-	}()
-
-	wr.logger.Infof("Skipping ValidateShard - not a graceful situation")
-
-	if _, ok := slaveTabletMap[masterElectTablet.Alias]; !ok && !force {
-		return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias, si.Keyspace(), si.ShardName(), topotools.MapKeys(slaveTabletMap))
-	}
-
-	// Check the master-elect and slaves are in good shape when the action
-	// has not been forced.
-	if !force {
-		// Make sure all tablets have the right parent and reasonable positions.
-		event.DispatchUpdate(ev, "checking slave replication positions")
-		if err := wr.checkSlaveReplication(ctx, slaveTabletMap, topo.NO_TABLET, waitSlaveTimeout); err != nil {
-			return err
-		}
-
-		// Check the master-elect is fit for duty - call out for hardware checks.
-		event.DispatchUpdate(ev, "checking that new master is ready to serve")
-		if err := wr.checkMasterElect(ctx, masterElectTablet); err != nil {
-			return err
-		}
-
-		event.DispatchUpdate(ev, "checking slave consistency")
-		wr.logger.Infof("check slaves %v/%v", masterElectTablet.Keyspace, masterElectTablet.Shard)
-		restartableSlaveTabletMap := wr.restartableTabletMap(slaveTabletMap)
-		err = wr.checkSlaveConsistency(ctx, restartableSlaveTabletMap, myproto.ReplicationPosition{}, waitSlaveTimeout)
-		if err != nil {
-			return err
-		}
-	} else {
-		event.DispatchUpdate(ev, "stopping slave replication")
-		wr.logger.Infof("forcing reparent to same master %v", masterElectTablet.Alias)
-		err := wr.breakReplication(ctx, slaveTabletMap, masterElectTablet)
-		if err != nil {
-			return err
-		}
-	}
-
-	event.DispatchUpdate(ev, "promoting new master")
-	rsd, err := wr.promoteSlave(ctx, masterElectTablet)
-	if err != nil {
-		// FIXME(msolomon) This suggests that the master-elect is dead.
-		// We need to classify certain errors as temporary and retry.
-		return fmt.Errorf("promote slave failed: %v %v", err, masterElectTablet.Alias)
-	}
-
-	// Once the slave is promoted, remove it from our maps
-	delete(slaveTabletMap, masterElectTablet.Alias)
-	delete(masterTabletMap, masterElectTablet.Alias)
-
-	event.DispatchUpdate(ev, "restarting slaves")
-	majorityRestart, restartSlaveErr := wr.restartSlaves(ctx, slaveTabletMap, rsd)
-
-	if !force {
-		for _, failedMaster := range masterTabletMap {
-			event.DispatchUpdate(ev, "scrapping old master")
-			wr.logger.Infof("scrap dead master %v", failedMaster.Alias)
-			// The master is dead so execute the action locally instead of
-			// enqueing the scrap action for an arbitrary amount of time.
-			if scrapErr := topotools.Scrap(ctx, wr.ts, failedMaster.Alias, false); scrapErr != nil {
-				wr.logger.Warningf("scrapping failed master failed: %v", scrapErr)
-			}
-		}
-	}
-
-	event.DispatchUpdate(ev, "rebuilding shard serving graph")
-	err = wr.finishReparent(ctx, si, masterElectTablet, majorityRestart, leaveMasterReadOnly)
-	if err != nil {
-		return err
-	}
-
-	event.DispatchUpdate(ev, "finished")
-
-	if restartSlaveErr != nil {
-		// This is more of a warning at this point.
-		return restartSlaveErr
-	}
-
-	return nil
-}
--- a/go/vt/wrangler/reparent_graceful.go
+++ b/go/vt/wrangler/reparent_graceful.go
@ -1,131 +0,0 @@
-// Copyright 2012, Google Inc. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package wrangler
-
-import (
-	"fmt"
-	"strings"
-	"time"
-
-	"github.com/youtube/vitess/go/event"
-	"github.com/youtube/vitess/go/vt/topo"
-	"github.com/youtube/vitess/go/vt/topotools"
-	"github.com/youtube/vitess/go/vt/topotools/events"
-	"golang.org/x/net/context"
-)
-
-// reparentShardGraceful executes a graceful reparent.
-// The ev parameter is an event struct prefilled with information that the
-// caller has on hand, which would be expensive for us to re-query.
-func (wr *Wrangler) reparentShardGraceful(ctx context.Context, ev *events.Reparent, si *topo.ShardInfo, slaveTabletMap, masterTabletMap map[topo.TabletAlias]*topo.TabletInfo, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly bool, waitSlaveTimeout time.Duration) (err error) {
-	event.DispatchUpdate(ev, "starting graceful")
-
-	defer func() {
-		if err != nil {
-			event.DispatchUpdate(ev, "failed: "+err.Error())
-		}
-	}()
-
-	// Validate a bunch of assumptions we make about the replication graph.
-	if len(masterTabletMap) != 1 {
-		aliases := make([]string, 0, len(masterTabletMap))
-		for _, v := range masterTabletMap {
-			aliases = append(aliases, v.String())
-		}
-		return fmt.Errorf("I have 0 or multiple masters / scrapped tablets in this shard replication graph, please scrap the non-master ones: %v", strings.Join(aliases, " "))
-	}
-	var masterTablet *topo.TabletInfo
-	for _, v := range masterTabletMap {
-		masterTablet = v
-	}
-
-	if masterTablet.Type != topo.TYPE_MASTER {
-		return fmt.Errorf("master tablet should not be type: %v %v", masterTablet.Type, masterTablet.Alias)
-	}
-
-	if masterTablet.Alias.Uid == masterElectTablet.Alias.Uid {
-		return fmt.Errorf("master tablet should not match master elect - this must be forced: %v", masterTablet.Alias)
-	}
-
-	if _, ok := slaveTabletMap[masterElectTablet.Alias]; !ok {
-		return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias, masterTablet.Keyspace, masterTablet.Shard, topotools.MapKeys(slaveTabletMap))
-	}
-
-	if err := wr.ValidateShard(ctx, masterTablet.Keyspace, masterTablet.Shard, true); err != nil {
-		return fmt.Errorf("ValidateShard verification failed: %v, if the master is dead, run: vtctl ScrapTablet -force %v", err, masterTablet.Alias)
-	}
-
-	// Make sure all tablets have the right parent and reasonable positions.
-	event.DispatchUpdate(ev, "checking slave replication positions")
-	err = wr.checkSlaveReplication(ctx, slaveTabletMap, masterTablet.Alias.Uid, waitSlaveTimeout)
-	if err != nil {
-		return err
-	}
-
-	// Check the master-elect is fit for duty - call out for hardware checks.
-	event.DispatchUpdate(ev, "checking that new master is ready to serve")
-	err = wr.checkMasterElect(ctx, masterElectTablet)
-	if err != nil {
-		return err
-	}
-
-	event.DispatchUpdate(ev, "demoting old master")
-	masterPosition, err := wr.demoteMaster(ctx, masterTablet)
-	if err != nil {
-		// FIXME(msolomon) This suggests that the master is dead and we
-		// need to take steps. We could either pop a prompt, or make
-		// retrying the action painless.
-		return fmt.Errorf("demote master failed: %v, if the master is dead, run: vtctl -force ScrapTablet %v", err, masterTablet.Alias)
-	}
-
-	event.DispatchUpdate(ev, "checking slave consistency")
-	wr.logger.Infof("check slaves %v/%v", masterTablet.Keyspace, masterTablet.Shard)
-	restartableSlaveTabletMap := wr.restartableTabletMap(slaveTabletMap)
-	err = wr.checkSlaveConsistency(ctx, restartableSlaveTabletMap, masterPosition, waitSlaveTimeout)
-	if err != nil {
-		return fmt.Errorf("check slave consistency failed %v, demoted master is still read only, run: vtctl SetReadWrite %v", err, masterTablet.Alias)
-	}
-
-	event.DispatchUpdate(ev, "promoting new master")
-	rsd, err := wr.promoteSlave(ctx, masterElectTablet)
-	if err != nil {
-		// FIXME(msolomon) This suggests that the master-elect is dead.
-		// We need to classify certain errors as temporary and retry.
-		return fmt.Errorf("promote slave failed: %v, demoted master is still read only: vtctl SetReadWrite %v", err, masterTablet.Alias)
-	}
-
-	// Once the slave is promoted, remove it from our map
-	delete(slaveTabletMap, masterElectTablet.Alias)
-
-	event.DispatchUpdate(ev, "restarting slaves")
-	majorityRestart, restartSlaveErr := wr.restartSlaves(ctx, slaveTabletMap, rsd)
-
-	// For now, scrap the old master regardless of how many
-	// slaves restarted.
-	//
-	// FIXME(msolomon) We could reintroduce it and reparent it and use
-	// it as new replica.
-	event.DispatchUpdate(ev, "scrapping old master")
-	wr.logger.Infof("scrap demoted master %v", masterTablet.Alias)
-	if scrapErr := wr.tmc.Scrap(ctx, masterTablet); scrapErr != nil {
-		// The sub action is non-critical, so just warn.
-		wr.logger.Warningf("scrap demoted master failed: %v", scrapErr)
-	}
-
-	event.DispatchUpdate(ev, "rebuilding shard serving graph")
-	err = wr.finishReparent(ctx, si, masterElectTablet, majorityRestart, leaveMasterReadOnly)
-	if err != nil {
-		return err
-	}
-
-	event.DispatchUpdate(ev, "finished")
-
-	if restartSlaveErr != nil {
-		// This is more of a warning at this point.
-		return restartSlaveErr
-	}
-
-	return nil
-}