Merge pull request #673 from youtube/replication

Replication
This commit is contained in:
Alain Jobart 2015-05-06 17:09:05 -07:00
Родитель e0e84b3bb8 1e312716dc
Коммит fd74fecf3f
5 изменённых файлов: 27 добавлений и 4 удалений

Просмотреть файл

@ -80,11 +80,12 @@ The actions performed are:
wait for the entry in the test table. (if a slave wasn't
replicating, we don't change its state and don't start replication
after reparent)
- additionally, on the old master, we start replication, so it catches up.
The old master is left as 'spare' in this scenario. If health checking
is enabled on that tablet (using target\_tablet\_type parameter for
vttablet), the server will most likely rejoin the cluster as a
replica.
replica on the next health check.
### Emergency Reparent: vtctl EmergencyReparentShard

Просмотреть файл

@ -71,7 +71,9 @@ type FakeMysqlDaemon struct {
// return an error.
MysqlPort int
// Replicating is updated when calling StopSlave
// Replicating is updated when calling StartSlave / StopSlave
// (it is not used at all when calling SlaveStatus, it is the
// test owner responsability to have these two match)
Replicating bool
// CurrentSlaveStatus is returned by SlaveStatus

Просмотреть файл

@ -403,6 +403,14 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R
wr.logger.Infof("setting new master on slave %v", alias)
if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now); err != nil {
rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", alias, err))
return
}
// also restart replication on old master
if alias == oldMasterTabletInfo.Alias {
if err := wr.TabletManagerClient().StartSlave(ctx, tabletInfo); err != nil {
rec.RecordError(fmt.Errorf("old master %v StartSlave failed: %v", alias, err))
}
}
}(alias, tabletInfo)
}

Просмотреть файл

@ -33,6 +33,7 @@ func TestPlannedReparentShard(t *testing.T) {
// new master
newMaster.FakeMysqlDaemon.ReadOnly = true
newMaster.FakeMysqlDaemon.Replicating = true
newMaster.FakeMysqlDaemon.WaitMasterPosition = myproto.ReplicationPosition{
GTIDSet: myproto.MariadbGTID{
Domain: 7,
@ -57,6 +58,7 @@ func TestPlannedReparentShard(t *testing.T) {
// old master
oldMaster.FakeMysqlDaemon.ReadOnly = false
oldMaster.FakeMysqlDaemon.Replicating = false
oldMaster.FakeMysqlDaemon.DemoteMasterPosition = newMaster.FakeMysqlDaemon.WaitMasterPosition
oldMaster.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"])
oldMaster.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"}
@ -69,6 +71,7 @@ func TestPlannedReparentShard(t *testing.T) {
// good slave 1 is replicating
goodSlave1.FakeMysqlDaemon.ReadOnly = true
goodSlave1.FakeMysqlDaemon.Replicating = true
goodSlave1.FakeMysqlDaemon.CurrentSlaveStatus = &myproto.ReplicationStatus{
SlaveIORunning: true,
SlaveSQLRunning: true,
@ -85,6 +88,7 @@ func TestPlannedReparentShard(t *testing.T) {
// good slave 2 is not replicating
goodSlave2.FakeMysqlDaemon.ReadOnly = true
goodSlave2.FakeMysqlDaemon.Replicating = false
goodSlave2.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"])
goodSlave2.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"}
goodSlave2.StartActionLoop(t, wr)
@ -127,4 +131,12 @@ func TestPlannedReparentShard(t *testing.T) {
t.Errorf("oldMaster...QueryServiceEnabled set")
}
// verify the old master was told to start replicating (and not
// the slave that wasn't replicating in the first place)
if !oldMaster.FakeMysqlDaemon.Replicating {
t.Errorf("oldMaster.FakeMysqlDaemon.Replicating not set")
}
if goodSlave2.FakeMysqlDaemon.Replicating {
t.Errorf("goodSlave2.FakeMysqlDaemon.Replicating set")
}
}

Просмотреть файл

@ -243,7 +243,7 @@ class TestSchema(unittest.TestCase):
auto_log=True)
# check all expected hosts have the change
self._check_tables(shard_0_master, 1) # was stuck a long time ago as scrap
self._check_tables(shard_0_master, 2) # was stuck a long time ago as scrap
self._check_tables(shard_0_replica1, 3) # current master
self._check_tables(shard_0_replica2, 3)
self._check_tables(shard_0_rdonly, 3)
@ -262,7 +262,7 @@ class TestSchema(unittest.TestCase):
# check all expected hosts have the change:
# - master won't have it as it's a complex change
# - backup won't have it as IsReplicatingType is false
self._check_tables(shard_0_master, 1) # was stuck a long time ago as scrap
self._check_tables(shard_0_master, 2) # was stuck a long time ago as scrap
self._check_tables(shard_0_replica1, 3) # current master
self._check_tables(shard_0_replica2, 4)
self._check_tables(shard_0_rdonly, 4)