- if there's more than 1 backup the correct one should be chosen
- update data on base keyspace after backup and ensure recovery keyspace does
not see the change
- recover a sharded keyspace

Signed-off-by: deepthi <deepthi@planetscale.com>
This commit is contained in:
deepthi 2019-09-04 21:43:37 -07:00
Родитель eef29d1061
Коммит 97bde32f67
2 изменённых файлов: 324 добавлений и 20 удалений

Просмотреть файл

@ -234,7 +234,7 @@ class TestRecovery(unittest.TestCase):
vs = utils.run_vtctl_json(['GetVSchema', 'recovery_keyspace'])
logging.debug('recovery_keyspace vschema: %s', str(vs))
# check the new replica does not have the data
# check the new replica has only 1 row
self._check_data(tablet_replica2, 1, 'replica2 tablet should not have new data')
# check that the restored replica has the right local_metadata
@ -246,6 +246,19 @@ class TestRecovery(unittest.TestCase):
self.assertEqual(metadata['ClusterAlias'], 'recovery_keyspace.0')
self.assertEqual(metadata['DataCenter'], 'test_nj')
# update original 1st row in master
tablet_master.mquery(
'vt_test_keyspace',
"update vt_insert_test set msg='new msg' where id=1", write=True)
# verify that master has new value
result = tablet_master.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'new msg')
# verify that restored replica has old value
result = tablet_replica2.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'test 1')
# start vtgate
vtgate = utils.VtGate()
vtgate.start(tablets=[
@ -267,7 +280,14 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 2)
# check that new tablet is accessible by using ks.table
cursor.execute('select msg from vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'new msg')
# check that new keyspace is accessible by using ks.table
cursor.execute('select count(*) from recovery_keyspace.vt_insert_test', {})
result = cursor.fetchall()
if not result:
@ -275,7 +295,14 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 1)
# check that new tablet is accessible with 'use ks'
cursor.execute('select msg from recovery_keyspace.vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'test 1')
# check that new keyspace is accessible with 'use ks'
cursor.execute('use recovery_keyspace@replica', {})
cursor.execute('select count(*) from vt_insert_test', {})
result = cursor.fetchall()
@ -284,6 +311,13 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 1)
cursor.execute('select msg from recovery_keyspace.vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'test 1')
# TODO check that new tablet is accessible with 'use ks:shard'
# this currently does not work through the python client, though it works from mysql client
#cursor.execute('use recovery_keyspace:0@replica', {})
@ -360,6 +394,19 @@ class TestRecovery(unittest.TestCase):
# check the new replica does not have the data
self._check_data(tablet_replica2, 1, 'replica2 tablet should not have new data')
# update original 1st row in master
tablet_master.mquery(
'vt_test_keyspace',
"update vt_insert_test set msg='new msg 1' where id=1", write=True)
# verify that master has new value
result = tablet_master.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'new msg 1')
# verify that restored replica has old value
result = tablet_replica2.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'test 1')
# take another backup on the replica
utils.run_vtctl(['Backup', tablet_replica1.tablet_alias], auto_log=True)
@ -369,6 +416,7 @@ class TestRecovery(unittest.TestCase):
self._check_data(tablet_replica1, 3, 'replica1 tablet getting data')
# now bring up the other replica, letting it restore from backup2.
# this also validates that if there are multiple backups, the most recent one is used
self._restore(tablet_replica3, 'recovery_ks2')
vs = utils.run_vtctl(['GetVSchema', 'recovery_ks2'])
@ -377,6 +425,19 @@ class TestRecovery(unittest.TestCase):
# check the new replica does not have the latest data
self._check_data(tablet_replica3, 2, 'replica3 tablet should not have new data')
# update original 1st row in master again
tablet_master.mquery(
'vt_test_keyspace',
"update vt_insert_test set msg='new msg 2' where id=1", write=True)
# verify that master has new value
result = tablet_master.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'new msg 2')
# verify that restored replica has correct value
result = tablet_replica3.mquery('vt_test_keyspace', 'select msg from vt_insert_test where id=1')
self.assertEqual(result[0][0], 'new msg 1')
# start vtgate
vtgate = utils.VtGate()
vtgate.start(tablets=all_tablets, tablet_types_to_wait='REPLICA')
@ -397,7 +458,14 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 3)
# check that new tablet is accessible by using ks.table
cursor.execute('select msg from vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'new msg 2')
# check that new keyspace is accessible by using ks.table
cursor.execute('select count(*) from recovery_ks1.vt_insert_test', {})
result = cursor.fetchall()
if not result:
@ -405,7 +473,14 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 1)
# check that new tablet is accessible by using ks.table
cursor.execute('select msg from recovery_ks1.vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'test 1')
# check that new keyspace is accessible by using ks.table
cursor.execute('select count(*) from recovery_ks2.vt_insert_test', {})
result = cursor.fetchall()
if not result:
@ -413,6 +488,13 @@ class TestRecovery(unittest.TestCase):
else:
self.assertEqual(result[0][0], 2)
cursor.execute('select msg from recovery_ks2.vt_insert_test where id=1', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 'new msg 1')
# TODO check that new tablet is accessible with 'use ks:shard'
# this currently does not work through the python client, though it works from mysql client
#cursor.execute('use recovery_ks1:0@replica', {})

Просмотреть файл

@ -34,6 +34,7 @@ tablet_replica1 = tablet.Tablet()
tablet_rdonly = tablet.Tablet()
# to use for recovery keyspace
tablet_replica2 = tablet.Tablet()
tablet_replica3 = tablet.Tablet()
# split shards
# range '' - 80
@ -45,7 +46,7 @@ shard_1_master = tablet.Tablet()
shard_1_replica = tablet.Tablet()
shard_1_rdonly = tablet.Tablet()
all_tablets = [tablet_master, tablet_replica1, tablet_replica2, tablet_rdonly,
all_tablets = [tablet_master, tablet_replica1, tablet_replica2, tablet_replica3, tablet_rdonly,
shard_0_master, shard_0_replica, shard_0_rdonly, shard_1_master, shard_1_replica, shard_1_rdonly]
def setUpModule():
@ -104,11 +105,12 @@ class TestShardedRecovery(unittest.TestCase):
t.set_semi_sync_enabled(master=False, slave=False)
t.clean_dbs()
for backup in self._list_backups():
self._remove_backup(backup)
for shard in ['0', '-80', '80-']:
for backup in self._list_backups(shard):
self._remove_backup(backup, shard)
_create_vt_insert_test = '''create table vt_insert_test (
id bigint auto_increment,
id bigint,
msg varchar(64),
primary key (id)
) Engine=InnoDB'''
@ -136,8 +138,8 @@ class TestShardedRecovery(unittest.TestCase):
"""Add a single row with value 'index' to the given tablet."""
t.mquery(
'vt_test_keyspace',
"insert into vt_insert_test (msg) values ('test %s')" %
index, write=True)
"insert into vt_insert_test (id, msg) values (%d, 'test %s')" %
(index, index), write=True)
def _check_data(self, t, count, msg):
"""Check that the specified tablet has the expected number of rows."""
@ -155,7 +157,7 @@ class TestShardedRecovery(unittest.TestCase):
logging.exception('exception waiting for data to replicate')
timeout = utils.wait_step(msg, timeout)
def _restore(self, t, keyspace):
def _restore(self, t, keyspace, shard):
"""Erase mysql/tablet dir, then start tablet with restore enabled."""
self._reset_tablet_dir(t)
@ -176,7 +178,7 @@ class TestShardedRecovery(unittest.TestCase):
t.start_vttablet(wait_for_state='SERVING',
init_tablet_type='replica',
init_keyspace=keyspace,
init_shard='0',
init_shard=shard,
supports_backups=True,
extra_args=xtra_args)
@ -189,21 +191,21 @@ class TestShardedRecovery(unittest.TestCase):
proc = t.init_mysql()
utils.wait_procs([proc])
def _list_backups(self):
def _list_backups(self, shard):
"""Get a list of backup names for the test shard."""
backups, _ = utils.run_vtctl(tablet.get_backup_storage_flags() +
['ListBackups', 'test_keyspace/0'],
['ListBackups', 'test_keyspace/%s' % shard],
mode=utils.VTCTL_VTCTL, trap_output=True)
return backups.splitlines()
def _remove_backup(self, backup):
def _remove_backup(self, backup, shard):
"""Remove a named backup from the test shard."""
utils.run_vtctl(
tablet.get_backup_storage_flags() +
['RemoveBackup', 'test_keyspace/0', backup],
['RemoveBackup', 'test_keyspace/%s' % shard, backup],
auto_log=True, mode=utils.VTCTL_VTCTL)
def test_sharded_recovery(self):
def test_unsharded_recovery_after_sharding(self):
"""Test recovery from backup flow.
test_recovery will:
@ -234,7 +236,7 @@ class TestShardedRecovery(unittest.TestCase):
utils.run_vtctl(['Backup', tablet_replica1.tablet_alias], auto_log=True)
# check that the backup shows up in the listing
backups = self._list_backups()
backups = self._list_backups('0')
logging.debug('list of backups: %s', backups)
self.assertEqual(len(backups), 1)
self.assertTrue(backups[0].endswith(tablet_replica1.tablet_alias))
@ -320,7 +322,7 @@ class TestShardedRecovery(unittest.TestCase):
auto_log=True)
# now bring up the recovery keyspace and a tablet, letting it restore from backup.
self._restore(tablet_replica2, 'recovery_keyspace')
self._restore(tablet_replica2, 'recovery_keyspace', '0')
# check the new replica does not have the data
self._check_data(tablet_replica2, 2, 'replica2 tablet should not have new data')
@ -392,5 +394,225 @@ class TestShardedRecovery(unittest.TestCase):
tablet_replica2.kill_vttablet()
vtgate.kill()
def test_sharded_recovery(self):
"""Test recovery from backup flow.
test_recovery will:
- create a shard with master and replica1 only
- run InitShardMaster
- insert some data
- perform a resharding
- take a backup of both new shards
- insert more data on the masters of both shards
- create a recovery keyspace
- bring up tablet_replica2 and tablet_replica3 in the new keyspace
- check that new tablets do not have data created after backup
- check that vtgate queries work correctly
"""
# insert data on master, wait for replica to get it
utils.run_vtctl(['ApplySchema',
'-sql', self._create_vt_insert_test,
'test_keyspace'],
auto_log=True)
self._insert_data(tablet_master, 1)
self._check_data(tablet_replica1, 1, 'replica1 tablet getting data')
# insert more data on the master
self._insert_data(tablet_master, 2)
utils.run_vtctl(['ApplyVSchema',
'-vschema', self._vschema_json,
'test_keyspace'],
auto_log=True)
# create the split shards
shard_0_master.init_tablet(
'replica',
keyspace='test_keyspace',
shard='-80',
tablet_index=0)
shard_0_replica.init_tablet(
'replica',
keyspace='test_keyspace',
shard='-80',
tablet_index=1)
shard_0_rdonly.init_tablet(
'rdonly',
keyspace='test_keyspace',
shard='-80',
tablet_index=2)
shard_1_master.init_tablet(
'replica',
keyspace='test_keyspace',
shard='80-',
tablet_index=0)
shard_1_replica.init_tablet(
'replica',
keyspace='test_keyspace',
shard='80-',
tablet_index=1)
shard_1_rdonly.init_tablet(
'rdonly',
keyspace='test_keyspace',
shard='80-',
tablet_index=2)
for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
shard_1_master, shard_1_replica, shard_1_rdonly]:
t.start_vttablet(wait_for_state=None,
binlog_use_v3_resharding_mode=True)
for t in [shard_0_master, shard_0_replica, shard_0_rdonly,
shard_1_master, shard_1_replica, shard_1_rdonly]:
t.wait_for_vttablet_state('NOT_SERVING')
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
shard_0_master.tablet_alias], auto_log=True)
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
shard_1_master.tablet_alias], auto_log=True)
for t in [shard_0_replica, shard_1_replica]:
utils.wait_for_tablet_type(t.tablet_alias, 'replica')
sharded_tablets = [shard_0_master, shard_0_replica, shard_0_rdonly,
shard_1_master, shard_1_replica, shard_1_rdonly]
for t in sharded_tablets:
t.wait_for_vttablet_state('SERVING')
# we need to create the schema, and the worker will do data copying
for keyspace_shard in ('test_keyspace/-80', 'test_keyspace/80-'):
utils.run_vtctl(['CopySchemaShard',
'test_keyspace/0',
keyspace_shard],
auto_log=True)
utils.run_vtctl(
['SplitClone', 'test_keyspace', '0', '-80,80-'], auto_log=True)
utils.run_vtctl(
['MigrateServedTypes', 'test_keyspace/0', 'rdonly'], auto_log=True)
utils.run_vtctl(
['MigrateServedTypes', 'test_keyspace/0', 'replica'], auto_log=True)
# then serve master from the split shards
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/0', 'master'],
auto_log=True)
result = shard_0_master.mquery('vt_test_keyspace', "select count(*) from vt_insert_test")
shard_0_count = result[0][0]
result = shard_1_master.mquery('vt_test_keyspace', "select count(*) from vt_insert_test")
shard_1_count = result[0][0]
# backup the new shards
utils.run_vtctl(['Backup', shard_0_replica.tablet_alias], auto_log=True)
utils.run_vtctl(['Backup', shard_1_replica.tablet_alias], auto_log=True)
# check that the backup shows up in the listing
backups = self._list_backups('-80')
logging.debug('list of backups: %s', backups)
self.assertEqual(len(backups), 1)
self.assertTrue(backups[0].endswith(shard_0_replica.tablet_alias))
backups = self._list_backups('80-')
logging.debug('list of backups: %s', backups)
self.assertEqual(len(backups), 1)
self.assertTrue(backups[0].endswith(shard_1_replica.tablet_alias))
# start vtgate
vtgate = utils.VtGate()
vtgate.start(tablets=[
shard_0_master, shard_1_master
], tablet_types_to_wait='MASTER')
utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
utils.vtgate.wait_for_endpoints('test_keyspace.80-.master', 1)
vtgate_conn = get_connection()
cursor = vtgate_conn.cursor(
tablet_type='master', keyspace=None, writable=True)
# insert more data on the masters
for i in [3, 4]:
cursor.execute('insert into vt_insert_test (id, msg) values (:id, :msg)', {'id': i, 'msg': 'test %s' % i})
vtgate_conn.close()
vtgate.kill()
# now bring up the recovery keyspace and 2 tablets, letting it restore from backup.
self._restore(tablet_replica2, 'recovery_keyspace', '-80')
self._restore(tablet_replica3, 'recovery_keyspace', '80-')
# check the new replicas have the correct number of rows
self._check_data(tablet_replica2, shard_0_count, 'replica2 tablet should not have new data')
self._check_data(tablet_replica3, shard_1_count, 'replica3 tablet should not have new data')
# remove the original tablets in the original shard
tablet.kill_tablets([tablet_master, tablet_replica1, tablet_rdonly])
for t in [tablet_replica1, tablet_rdonly]:
utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
utils.run_vtctl(['DeleteTablet', '-allow_master',
tablet_master.tablet_alias], auto_log=True)
# rebuild the serving graph, all mentions of the old shards should be gone
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
# delete the original shard
utils.run_vtctl(['DeleteShard', 'test_keyspace/0'], auto_log=True)
# start vtgate
vtgate = utils.VtGate()
vtgate.start(tablets=[
shard_0_master, shard_0_replica, shard_1_master, shard_1_replica, tablet_replica2, tablet_replica3
], tablet_types_to_wait='REPLICA')
utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica', 1)
utils.vtgate.wait_for_endpoints('test_keyspace.-80.master', 1)
utils.vtgate.wait_for_endpoints('test_keyspace.80-.replica', 1)
utils.vtgate.wait_for_endpoints('recovery_keyspace.-80.replica', 1)
utils.vtgate.wait_for_endpoints('recovery_keyspace.80-.replica', 1)
# check that vtgate doesn't route queries to new tablet
vtgate_conn = get_connection()
cursor = vtgate_conn.cursor(
tablet_type='replica', keyspace=None, writable=True)
cursor.execute('select count(*) from vt_insert_test', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 4)
# check that new keyspace is accessible by using ks.table
cursor.execute('select count(*) from recovery_keyspace.vt_insert_test', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 2)
# check that new keyspace is accessible with 'use ks'
cursor.execute('use recovery_keyspace@replica', {})
cursor.execute('select count(*) from vt_insert_test', {})
result = cursor.fetchall()
if not result:
self.fail('Result cannot be null')
else:
self.assertEqual(result[0][0], 2)
# TODO check that new tablet is accessible with 'use ks:shard'
# this currently does not work through the python client, though it works from mysql client
#cursor.execute('use recovery_keyspace:0@replica', {})
#cursor.execute('select count(*) from vt_insert_test', {})
#result = cursor.fetchall()
#if not result:
#self.fail('Result cannot be null')
#else:
#self.assertEqual(result[0][0], 1)
vtgate_conn.close()
tablet_replica2.kill_vttablet()
tablet_replica3.kill_vttablet()
vtgate.kill()
if __name__ == '__main__':
utils.main()