Now reparent doesn't interfere with slave replication status.

If they were replicating, we stop / set master / start, if they were
not, we just set master. Side effect is ReparentTablet may not
start replication if it was stopped, but it's better that way.
This commit is contained in:
Alain Jobart 2015-05-06 08:52:57 -07:00
Родитель 9656807a14
Коммит 00d5262fa8
7 изменённых файлов: 104 добавлений и 26 удалений

Просмотреть файл

@ -21,8 +21,9 @@ This file handles the differences between flavors of mysql.
// MysqlFlavor is the abstract interface for a flavor.
type MysqlFlavor interface {
// VersionMatch returns true if the version string (from SELECT VERSION())
// represents a server that this flavor knows how to talk to.
// VersionMatch returns true if the version string (from
// SELECT VERSION()) represents a server that this flavor
// knows how to talk to.
VersionMatch(version string) bool
// MasterPosition returns the ReplicationPosition of a master.
@ -39,40 +40,48 @@ type MysqlFlavor interface {
// a slave into a master.
PromoteSlaveCommands() []string
// StartReplicationCommands returns the commands to start replicating from
// a given master and position as specified in a ReplicationStatus.
// StartReplicationCommands returns the commands to start
// replicating from a given master and position as specified
// in a ReplicationStatus. It should start replication.
StartReplicationCommands(params *sqldb.ConnParams, status *proto.ReplicationStatus) ([]string, error)
// SetMasterCommands returns the commands to use the provided master
// as the new master (without changing any GTID position).
// It is guaranteed to be called with replication stopped.
// It should not start or stop replication.
SetMasterCommands(params *sqldb.ConnParams, masterHost string, masterPort int, masterConnectRetry int) ([]string, error)
// ParseGTID parses a GTID in the canonical format of this MySQL flavor into
// a proto.GTID interface value.
// ParseGTID parses a GTID in the canonical format of this
// MySQL flavor into a proto.GTID interface value.
ParseGTID(string) (proto.GTID, error)
// ParseReplicationPosition parses a replication position in the canonical
// format of this MySQL flavor into a proto.ReplicationPosition struct.
// ParseReplicationPosition parses a replication position in
// the canonical format of this MySQL flavor into a
// proto.ReplicationPosition struct.
ParseReplicationPosition(string) (proto.ReplicationPosition, error)
// SendBinlogDumpCommand sends the flavor-specific version of the
// COM_BINLOG_DUMP command to start dumping raw binlog events over a slave
// connection, starting at a given GTID.
// SendBinlogDumpCommand sends the flavor-specific version of
// the COM_BINLOG_DUMP command to start dumping raw binlog
// events over a slave connection, starting at a given GTID.
SendBinlogDumpCommand(mysqld *Mysqld, conn *SlaveConnection, startPos proto.ReplicationPosition) error
// MakeBinlogEvent takes a raw packet from the MySQL binlog stream connection
// and returns a BinlogEvent through which the packet can be examined.
// MakeBinlogEvent takes a raw packet from the MySQL binlog
// stream connection and returns a BinlogEvent through which
// the packet can be examined.
MakeBinlogEvent(buf []byte) blproto.BinlogEvent
// WaitMasterPos waits until slave replication reaches at least targetPos.
// WaitMasterPos waits until slave replication reaches at
// least targetPos.
WaitMasterPos(mysqld *Mysqld, targetPos proto.ReplicationPosition, waitTimeout time.Duration) error
// EnableBinlogPlayback prepares the server to play back events from a binlog stream.
// Whatever it does for a given flavor, it must be idempotent.
// EnableBinlogPlayback prepares the server to play back
// events from a binlog stream. Whatever it does for a given
// flavor, it must be idempotent.
EnableBinlogPlayback(mysqld *Mysqld) error
// DisableBinlogPlayback returns the server to the normal state after playback is done.
// Whatever it does for a given flavor, it must be idempotent.
// DisableBinlogPlayback returns the server to the normal
// state after playback is done. Whatever it does for a given
// flavor, it must be idempotent.
DisableBinlogPlayback(mysqld *Mysqld) error
}

Просмотреть файл

@ -125,9 +125,7 @@ func (*mariaDB10) SetMasterCommands(params *sqldb.ConnParams, masterHost string,
changeMasterTo := "CHANGE MASTER TO\n " + strings.Join(args, ",\n ")
return []string{
"STOP SLAVE",
changeMasterTo,
"START SLAVE",
}, nil
}

Просмотреть файл

@ -542,11 +542,24 @@ func (agent *ActionAgent) SetMaster(ctx context.Context, parent topo.TabletAlias
return err
}
// See if we are replicating at all
replicating := false
rs, err := agent.MysqlDaemon.SlaveStatus()
if err == nil && (rs.SlaveIORunning || rs.SlaveSQLRunning) {
replicating = true
}
// TODO(alainjobart) fix the hardcoding of MasterConnectRetry
cmds, err := agent.MysqlDaemon.SetMasterCommands(ti.Hostname, ti.Portmap["mysql"], 10)
if err != nil {
return err
}
if replicating {
newCmds := []string{"STOP SLAVE"}
newCmds = append(newCmds, cmds...)
newCmds = append(newCmds, "START SLAVE")
cmds = newCmds
}
if err := agent.MysqlDaemon.ExecuteSuperQueryList(cmds); err != nil {
return err
@ -567,7 +580,7 @@ func (agent *ActionAgent) SetMaster(ctx context.Context, parent topo.TabletAlias
// if needed, wait until we get the replicated row, or our
// context times out
if timeCreatedNS == 0 {
if !replicating || timeCreatedNS == 0 {
return nil
}
return agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS)

Просмотреть файл

@ -74,6 +74,12 @@ var commands = []commandGroup{
command{"SetReadWrite", commandSetReadWrite,
"[<tablet alias>]",
"Sets the tablet as ReadWrite."},
command{"StartSlave", commandStartSlave,
"[<tablet alias>]",
"Starts replication on the slave."},
command{"StopSlave", commandStopSlave,
"[<tablet alias>]",
"Stops replication on the slave."},
command{"ChangeSlaveType", commandChangeSlaveType,
"[-force] [-dry-run] <tablet alias> <tablet type>",
"Change the db type for this tablet if possible. This is mostly for arranging replicas - it will not convert a master.\n" +
@ -715,6 +721,44 @@ func commandSetReadWrite(ctx context.Context, wr *wrangler.Wrangler, subFlags *f
return wr.TabletManagerClient().SetReadWrite(ctx, ti)
}
func commandStartSlave(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error {
if err := subFlags.Parse(args); err != nil {
return err
}
if subFlags.NArg() != 1 {
return fmt.Errorf("action StartSlave requires <tablet alias>")
}
tabletAlias, err := topo.ParseTabletAliasString(subFlags.Arg(0))
if err != nil {
return err
}
ti, err := wr.TopoServer().GetTablet(tabletAlias)
if err != nil {
return fmt.Errorf("failed reading tablet %v: %v", tabletAlias, err)
}
return wr.TabletManagerClient().StartSlave(ctx, ti)
}
func commandStopSlave(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error {
if err := subFlags.Parse(args); err != nil {
return err
}
if subFlags.NArg() != 1 {
return fmt.Errorf("action StopSlave requires <tablet alias>")
}
tabletAlias, err := topo.ParseTabletAliasString(subFlags.Arg(0))
if err != nil {
return err
}
ti, err := wr.TopoServer().GetTablet(tabletAlias)
if err != nil {
return fmt.Errorf("failed reading tablet %v: %v", tabletAlias, err)
}
return wr.TabletManagerClient().StopSlave(ctx, ti)
}
func commandChangeSlaveType(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error {
force := subFlags.Bool("force", false, "will change the type in zookeeper, and not run hooks")
dryRun := subFlags.Bool("dry-run", false, "just list the proposed change")

Просмотреть файл

@ -52,7 +52,7 @@ func TestEmergencyReparentShard(t *testing.T) {
oldMaster.StartActionLoop(t, wr)
defer oldMaster.StopActionLoop(t)
// good slave 1
// good slave 1 is replicating
goodSlave1.FakeMysqlDaemon.ReadOnly = true
goodSlave1.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{
GTIDSet: myproto.MariadbGTID{
@ -61,15 +61,21 @@ func TestEmergencyReparentShard(t *testing.T) {
Sequence: 455,
},
}
goodSlave1.FakeMysqlDaemon.CurrentSlaveStatus = &myproto.ReplicationStatus{
SlaveIORunning: true,
SlaveSQLRunning: true,
}
goodSlave1.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v,%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"], 10)
goodSlave1.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"}
goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
"set master cmd 1",
"START SLAVE",
}
goodSlave1.StartActionLoop(t, wr)
defer goodSlave1.StopActionLoop(t)
// good slave 2
// good slave 2 is not replicating
goodSlave2.FakeMysqlDaemon.ReadOnly = true
goodSlave2.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{
GTIDSet: myproto.MariadbGTID{

Просмотреть файл

@ -67,17 +67,23 @@ func TestPlannedReparentShard(t *testing.T) {
defer oldMaster.StopActionLoop(t)
oldMaster.Agent.QueryServiceControl.(*tabletserver.TestQueryServiceControl).QueryServiceEnabled = true
// good slave 1
// good slave 1 is replicating
goodSlave1.FakeMysqlDaemon.ReadOnly = true
goodSlave1.FakeMysqlDaemon.CurrentSlaveStatus = &myproto.ReplicationStatus{
SlaveIORunning: true,
SlaveSQLRunning: true,
}
goodSlave1.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v,%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"], 10)
goodSlave1.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"}
goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
"STOP SLAVE",
"set master cmd 1",
"START SLAVE",
}
goodSlave1.StartActionLoop(t, wr)
defer goodSlave1.StopActionLoop(t)
// good slave 2
// good slave 2 is not replicating
goodSlave2.FakeMysqlDaemon.ReadOnly = true
goodSlave2.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v,%v", newMaster.Tablet.Hostname, newMaster.Tablet.Portmap["mysql"], 10)
goodSlave2.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"}

Просмотреть файл

@ -613,8 +613,10 @@ class TestReparent(unittest.TestCase):
utils.pause('check orphan')
# reparent the tablet, should catch up on replication really quickly
# reparent the tablet (will not start replication, so we have to
# do it ourselves), then it should catch up on replication really quickly
utils.run_vtctl(['ReparentTablet', tablet_41983.tablet_alias])
utils.run_vtctl(['StartSlave', tablet_41983.tablet_alias])
# wait until it gets the data
timeout = 10.0