From c4d1964e947d365de1cb132b205e65bf1ad2a10d Mon Sep 17 00:00:00 2001 From: Alain Jobart Date: Wed, 6 May 2015 16:24:09 -0700 Subject: [PATCH] For PlannedReparentShard, restarting replication on old master after reparent is done. Might as well try it if possible. --- doc/Reparenting.md | 3 ++- go/vt/mysqlctl/mysql_daemon.go | 4 +++- go/vt/wrangler/reparent.go | 8 ++++++++ .../wrangler/testlib/planned_reparent_shard_test.go | 12 ++++++++++++ test/schema.py | 4 ++-- 5 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/Reparenting.md b/doc/Reparenting.md index 0be0a427a9..4ad91e5c2c 100644 --- a/doc/Reparenting.md +++ b/doc/Reparenting.md @@ -80,11 +80,12 @@ The actions performed are: wait for the entry in the test table. (if a slave wasn't replicating, we don't change its state and don't start replication after reparent) + - additionally, on the old master, we start replication, so it catches up. The old master is left as 'spare' in this scenario. If health checking is enabled on that tablet (using target\_tablet\_type parameter for vttablet), the server will most likely rejoin the cluster as a -replica. +replica on the next health check. ### Emergency Reparent: vtctl EmergencyReparentShard diff --git a/go/vt/mysqlctl/mysql_daemon.go b/go/vt/mysqlctl/mysql_daemon.go index ba3795179a..e11015b7d6 100644 --- a/go/vt/mysqlctl/mysql_daemon.go +++ b/go/vt/mysqlctl/mysql_daemon.go @@ -71,7 +71,9 @@ type FakeMysqlDaemon struct { // return an error. MysqlPort int - // Replicating is updated when calling StopSlave + // Replicating is updated when calling StartSlave / StopSlave + // (it is not used at all when calling SlaveStatus, it is the + // test owner responsability to have these two match) Replicating bool // CurrentSlaveStatus is returned by SlaveStatus diff --git a/go/vt/wrangler/reparent.go b/go/vt/wrangler/reparent.go index e22bc59f93..1137597749 100644 --- a/go/vt/wrangler/reparent.go +++ b/go/vt/wrangler/reparent.go @@ -403,6 +403,14 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R wr.logger.Infof("setting new master on slave %v", alias) if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", alias, err)) + return + } + + // also restart replication on old master + if alias == oldMasterTabletInfo.Alias { + if err := wr.TabletManagerClient().StartSlave(ctx, tabletInfo); err != nil { + rec.RecordError(fmt.Errorf("old master %v StartSlave failed: %v", alias, err)) + } } }(alias, tabletInfo) } diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index 5800363cde..c4d1b87dbf 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -33,6 +33,7 @@ func TestPlannedReparentShard(t *testing.T) { // new master newMaster.FakeMysqlDaemon.ReadOnly = true + newMaster.FakeMysqlDaemon.Replicating = true newMaster.FakeMysqlDaemon.WaitMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 7, @@ -57,6 +58,7 @@ func TestPlannedReparentShard(t *testing.T) { // old master oldMaster.FakeMysqlDaemon.ReadOnly = false + oldMaster.FakeMysqlDaemon.Replicating = false oldMaster.FakeMysqlDaemon.DemoteMasterPosition = newMaster.FakeMysqlDaemon.WaitMasterPosition oldMaster.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"]) oldMaster.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} @@ -69,6 +71,7 @@ func TestPlannedReparentShard(t *testing.T) { // good slave 1 is replicating goodSlave1.FakeMysqlDaemon.ReadOnly = true + goodSlave1.FakeMysqlDaemon.Replicating = true goodSlave1.FakeMysqlDaemon.CurrentSlaveStatus = &myproto.ReplicationStatus{ SlaveIORunning: true, SlaveSQLRunning: true, @@ -85,6 +88,7 @@ func TestPlannedReparentShard(t *testing.T) { // good slave 2 is not replicating goodSlave2.FakeMysqlDaemon.ReadOnly = true + goodSlave2.FakeMysqlDaemon.Replicating = false goodSlave2.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"]) goodSlave2.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} goodSlave2.StartActionLoop(t, wr) @@ -127,4 +131,12 @@ func TestPlannedReparentShard(t *testing.T) { t.Errorf("oldMaster...QueryServiceEnabled set") } + // verify the old master was told to start replicating (and not + // the slave that wasn't replicating in the first place) + if !oldMaster.FakeMysqlDaemon.Replicating { + t.Errorf("oldMaster.FakeMysqlDaemon.Replicating not set") + } + if goodSlave2.FakeMysqlDaemon.Replicating { + t.Errorf("goodSlave2.FakeMysqlDaemon.Replicating set") + } } diff --git a/test/schema.py b/test/schema.py index fc999b959c..6f4f846b89 100755 --- a/test/schema.py +++ b/test/schema.py @@ -243,7 +243,7 @@ class TestSchema(unittest.TestCase): auto_log=True) # check all expected hosts have the change - self._check_tables(shard_0_master, 1) # was stuck a long time ago as scrap + self._check_tables(shard_0_master, 2) # was stuck a long time ago as scrap self._check_tables(shard_0_replica1, 3) # current master self._check_tables(shard_0_replica2, 3) self._check_tables(shard_0_rdonly, 3) @@ -262,7 +262,7 @@ class TestSchema(unittest.TestCase): # check all expected hosts have the change: # - master won't have it as it's a complex change # - backup won't have it as IsReplicatingType is false - self._check_tables(shard_0_master, 1) # was stuck a long time ago as scrap + self._check_tables(shard_0_master, 2) # was stuck a long time ago as scrap self._check_tables(shard_0_replica1, 3) # current master self._check_tables(shard_0_replica2, 4) self._check_tables(shard_0_rdonly, 4)