Merge pull request #6618 from planetscale/ss-tm2-more-logs

tm: add more logging to checkMastership
This commit is contained in:
Deepthi Sigireddi 2020-08-25 18:22:07 -07:00 коммит произвёл GitHub
Родитель 6f1402d3e2 5fc7992948
Коммит 8917adfc5c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 37 добавлений и 3 удалений

Просмотреть файл

@ -459,6 +459,7 @@ func (tm *TabletManager) checkMastership(ctx context.Context, si *topo.ShardInfo
case topo.IsErrType(err, topo.NoNode):
// There's no existing tablet record, so we can assume
// no one has left us a message to step down.
log.Infof("Shard master alias matches, but there is no existing tablet record. Switching to master with 'Now' as time")
tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
tablet.Type = topodatapb.TabletType_MASTER
// Update the master term start time (current value is 0) because we
@ -468,12 +469,19 @@ func (tm *TabletManager) checkMastership(ctx context.Context, si *topo.ShardInfo
})
case err == nil:
if oldTablet.Type == topodatapb.TabletType_MASTER {
log.Infof("Shard master alias matches, and existing tablet agrees. Switching to master with tablet's master term start time: %v", oldTablet.MasterTermStartTime)
// We're marked as master in the shard record,
// and our existing tablet record agrees.
tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
tablet.Type = topodatapb.TabletType_MASTER
tablet.MasterTermStartTime = oldTablet.MasterTermStartTime
})
} else {
log.Warningf("Shard master alias matches, but existing tablet is not master. Switching to master with the shard's master term start time: %v", oldTablet.MasterTermStartTime)
tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
tablet.Type = topodatapb.TabletType_MASTER
tablet.MasterTermStartTime = si.MasterTermStartTime
})
}
default:
return vterrors.Wrap(err, "InitTablet failed to read existing tablet record")
@ -490,10 +498,13 @@ func (tm *TabletManager) checkMastership(ctx context.Context, si *topo.ShardInfo
oldMasterTermStartTime := oldTablet.GetMasterTermStartTime()
currentShardTime := si.GetMasterTermStartTime()
if oldMasterTermStartTime.After(currentShardTime) {
log.Infof("Shard master alias does not match, but the tablet's master term start time is newer. Switching to master with tablet's master term start time: %v", oldTablet.MasterTermStartTime)
tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) {
tablet.Type = topodatapb.TabletType_MASTER
tablet.MasterTermStartTime = oldTablet.MasterTermStartTime
})
} else {
log.Infof("Existing tablet type is master, but the shard record has a different master with a newer timestamp. Remaining a replica")
}
}
default:

Просмотреть файл

@ -218,9 +218,13 @@ func TestCheckMastership(t *testing.T) {
// 2. Update shard's master to our alias, then try to init again.
// (This simulates the case where the MasterAlias in the shard record says
// that we are the master but the tablet record says otherwise. In that case,
// we assume we are not the MASTER.)
// we become master by inheriting the shard record's timestamp.)
now := time.Now()
_, err = ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
si.MasterAlias = alias
si.MasterTermStartTime = logutil.TimeToProto(now)
// Reassign to now for easier comparison.
now = si.GetMasterTermStartTime()
return nil
})
require.NoError(t, err)
@ -228,9 +232,9 @@ func TestCheckMastership(t *testing.T) {
require.NoError(t, err)
ti, err = ts.GetTablet(ctx, alias)
require.NoError(t, err)
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
assert.Equal(t, topodatapb.TabletType_MASTER, ti.Type)
ter0 := ti.GetMasterTermStartTime()
assert.True(t, ter0.IsZero())
assert.Equal(t, now, ter0)
tm.Stop()
// 3. Delete the tablet record. The shard record still says that we are the
@ -291,6 +295,25 @@ func TestCheckMastership(t *testing.T) {
ter4 := ti.GetMasterTermStartTime()
assert.Equal(t, ter1, ter4)
tm.Stop()
// 7. If the shard record shows a different master with a newer
// timestamp, we remain replica.
_, err = ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
si.MasterAlias = otherAlias
si.MasterTermStartTime = logutil.TimeToProto(ter4.Add(10 * time.Second))
return nil
})
require.NoError(t, err)
tablet.Type = topodatapb.TabletType_REPLICA
tablet.MasterTermStartTime = nil
err = tm.Start(tablet, 0)
require.NoError(t, err)
ti, err = ts.GetTablet(ctx, alias)
require.NoError(t, err)
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
ter5 := ti.GetMasterTermStartTime()
assert.True(t, ter5.IsZero())
tm.Stop()
}
func TestStartCheckMysql(t *testing.T) {