vitess-gh/test/reparent.py

786 строки
32 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright 2019 The Vitess Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import time
import unittest
import environment
import utils
import tablet
from mysql_flavor import mysql_flavor
from protocols_flavor import protocols_flavor
from vtproto import topodata_pb2
tablet_62344 = tablet.Tablet(62344)
tablet_62044 = tablet.Tablet(62044)
tablet_41983 = tablet.Tablet(41983)
tablet_31981 = tablet.Tablet(31981)
def setUpModule():
try:
environment.topo_server().setup()
# start mysql instance external to the test
setup_procs = [
tablet_62344.init_mysql(),
tablet_62044.init_mysql(),
tablet_41983.init_mysql(),
tablet_31981.init_mysql(),
]
utils.Vtctld().start()
utils.wait_procs(setup_procs)
except:
tearDownModule()
raise
def tearDownModule():
utils.required_teardown()
if utils.options.skip_teardown:
return
teardown_procs = [
tablet_62344.teardown_mysql(),
tablet_62044.teardown_mysql(),
tablet_41983.teardown_mysql(),
tablet_31981.teardown_mysql(),
]
utils.wait_procs(teardown_procs, raise_on_error=False)
environment.topo_server().teardown()
utils.kill_sub_processes()
utils.remove_tmp_files()
tablet_62344.remove_tree()
tablet_62044.remove_tree()
tablet_41983.remove_tree()
tablet_31981.remove_tree()
class TestReparent(unittest.TestCase):
def tearDown(self):
tablet.Tablet.check_vttablet_count()
environment.topo_server().wipe()
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.reset_replication()
t.set_semi_sync_enabled(master=False)
t.clean_dbs(include_vt=True)
super(TestReparent, self).tearDown()
_create_vt_insert_test = '''create table vt_insert_test (
id bigint,
msg varchar(64),
primary key (id)
) Engine=InnoDB'''
def _populate_vt_insert_test(self, master_tablet, index):
q = ("insert into vt_insert_test(id, msg) values (%d, 'test %d')" %
(index, index))
master_tablet.mquery('vt_test_keyspace', q, write=True)
def _check_vt_insert_test(self, tablet_obj, index):
# wait until it gets the data
timeout = 10.0
while True:
result = tablet_obj.mquery(
'vt_test_keyspace',
'select msg from vt_insert_test where id=%d' % index)
if len(result) == 1:
break
timeout = utils.wait_step('waiting for replication to catch up on %s' %
tablet_obj.tablet_alias,
timeout, sleep_time=0.1)
def _check_master_tablet(self, t, port=None):
"""Makes sure the tablet type is master, and its health check agrees."""
ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
self.assertEqual(ti['type'], topodata_pb2.MASTER)
if port:
self.assertEqual(ti['port_map']['vt'], port)
# make sure the health stream is updated
health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1',
t.tablet_alias])
self.assertIn('serving', health)
self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER)
def test_master_to_spare_state_change_impossible(self):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True,
wait_for_start=True)
utils.run_vtctl(['ChangeSlaveType', tablet_62344.tablet_alias, 'spare'],
expect_fail=True)
tablet_62344.kill_vttablet()
def test_reparent_down_master(self):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting.
tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# wait for all tablets to start
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
tablet_62344.tablet_alias], auto_log=True)
utils.validate_topology()
tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)
# Make the current master agent and database unavailable.
tablet_62344.kill_vttablet()
tablet_62344.shutdown_mysql().wait()
# Perform a planned reparent operation, will try to contact
# the current master and fail somewhat quickly
_, stderr = utils.run_vtctl(['-wait-time', '5s',
'PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-new_master', tablet_62044.tablet_alias],
expect_fail=True)
self.assertIn('current master must be healthy to perform planned reparent', stderr)
# Run forced reparent operation, this should now proceed unimpeded.
utils.run_vtctl(['EmergencyReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-new_master', tablet_62044.tablet_alias], auto_log=True)
utils.validate_topology()
self._check_master_tablet(tablet_62044)
# insert data into the new master, check the connected slaves work
self._populate_vt_insert_test(tablet_62044, 2)
self._check_vt_insert_test(tablet_41983, 2)
self._check_vt_insert_test(tablet_31981, 2)
# bring back the old master as a slave, check that it catches up
tablet_62344.start_mysql().wait()
tablet_62344.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
self._check_vt_insert_test(tablet_62344, 2)
tablet.kill_tablets(
[tablet_62344, tablet_62044, tablet_41983, tablet_31981])
def test_reparent_cross_cell(self, shard_id='0'):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting. Won't be healthy
# as replication is not running.
tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/' + shard_id,
tablet_62344.tablet_alias], auto_log=True)
utils.validate_topology(ping_tablets=True)
self._check_master_tablet(tablet_62344)
# Perform a graceful reparent operation to another cell.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_31981.tablet_alias], auto_log=True)
utils.validate_topology()
self._check_master_tablet(tablet_31981)
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
tablet_31981])
def test_reparent_graceful_range_based(self):
utils.run_vtctl(['CreateKeyspace',
'--sharding_column_name', 'keyspace_id',
'--sharding_column_type', 'uint64',
'test_keyspace'])
self._test_reparent_graceful('0000000000000000-ffffffffffffffff')
def test_reparent_graceful(self):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
self._test_reparent_graceful('0')
def test_reparent_graceful_recovery(self):
# Test that PRS can perform a graceful recovery
# as long as all tablets are responding.
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
self._test_reparent_graceful('0', confused_master=True)
def _test_reparent_graceful(self, shard_id, confused_master=False):
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', shard_id, start=True)
# Create a few slaves for testing reparenting.
tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
for t in [tablet_62044, tablet_41983, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/' + shard_id,
tablet_62344.tablet_alias])
utils.validate_topology(ping_tablets=True)
tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)
self._check_master_tablet(tablet_62344)
utils.validate_topology()
# Run this to make sure it succeeds.
stdout, _ = utils.run_vtctl(['ShardReplicationPositions',
'test_keyspace/' + shard_id],
trap_output=True)
lines = stdout.splitlines()
self.assertEqual(len(lines), 4) # one master, three slaves
self.assertIn('master', lines[0]) # master first
# Perform a graceful reparent operation.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_62044.tablet_alias], auto_log=True)
utils.validate_topology()
if confused_master:
# Simulate a master that forgets it's master and becomes replica.
# PRS should be able to recover by reparenting to the same master again,
# as long as all tablets are available to check that it's safe.
tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=False)
utils.run_vtctl(['RefreshState', tablet_62044.tablet_alias])
# Perform a graceful reparent to the same master.
# It should be idempotent, and should fix any inconsistencies if necessary.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_62044.tablet_alias], auto_log=True)
utils.validate_topology()
self._check_master_tablet(tablet_62044)
# insert data into the new master, check the connected slaves work
self._populate_vt_insert_test(tablet_62044, 1)
self._check_vt_insert_test(tablet_41983, 1)
self._check_vt_insert_test(tablet_62344, 1)
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
tablet_31981])
# Test address correction.
new_port = environment.reserve_ports(1)
tablet_62044.start_vttablet(port=new_port)
# Wait until the new address registers.
timeout = 30.0
while True:
try:
self._check_master_tablet(tablet_62044, port=new_port)
break
except protocols_flavor().client_error_exception_type():
timeout = utils.wait_step('waiting for new port to register',
timeout, sleep_time=0.1)
tablet_62044.kill_vttablet()
# Reparenting should return error if replica vttablet is down
def test_reparent_slave_offline(self, shard_id='0'):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting.
tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
# wait for all tablets to start
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/' + shard_id,
tablet_62344.tablet_alias])
utils.validate_topology(ping_tablets=True)
self._check_master_tablet(tablet_62344)
# Kill one tablet so we seem offline
tablet_31981.kill_vttablet()
# Perform a graceful reparent operation.
_, stderr = utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_62044.tablet_alias], expect_fail=True)
self.assertIn('tablet test_ny-0000031981 SetMaster failed', stderr)
self._check_master_tablet(tablet_62044)
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983])
def test_reparent_avoid(self):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting. Won't be healthy
# as replication is not running.
tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
for t in [tablet_62344, tablet_62044, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
tablet_62344.tablet_alias], auto_log=True)
utils.validate_topology(ping_tablets=True)
self._check_master_tablet(tablet_62344)
# Perform a reparent operation with avoid_master pointing to non-master. It
# should succeed without doing anything.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-avoid_master', tablet_62044.tablet_alias], auto_log=True)
utils.validate_topology()
self._check_master_tablet(tablet_62344)
# Perform a reparent operation with avoid_master pointing to master.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-avoid_master', tablet_62344.tablet_alias], auto_log=True)
utils.validate_topology()
# 62044 is in the same cell and 31981 is in a different cell, so we must
# land on 62044
self._check_master_tablet(tablet_62044)
# If we kill the tablet in the same cell as master then reparent
# -avoid_master will fail.
tablet_62344.kill_vttablet()
_, stderr = utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-avoid_master', tablet_62044.tablet_alias],
auto_log=True,
expect_fail=True)
self.assertIn('cannot find a tablet to reparent to', stderr)
utils.validate_topology()
self._check_master_tablet(tablet_62044)
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
tablet_31981])
# assume a different entity is doing the reparent, and telling us it was done
def test_reparent_from_outside(self):
self._test_reparent_from_outside(brutal=False)
def test_reparent_from_outside_brutal(self):
self._test_reparent_from_outside(brutal=True)
def _test_reparent_from_outside(self, brutal=False):
"""This test will start a master and 3 slaves.
Then:
- one slave will be the new master
- one slave will be reparented to that new master
- one slave will be busted and dead in the water
and we'll call TabletExternallyReparented.
Args:
brutal: kills the old master first
"""
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting.
tablet_62044.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# wait for all tablets to start
for t in [tablet_62344, tablet_62044, tablet_41983, tablet_31981]:
t.wait_for_vttablet_state('NOT_SERVING')
# Reparent as a starting point
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
tablet_62344.tablet_alias], auto_log=True)
# now manually reparent 1 out of 2 tablets
# 62044 will be the new master
# 31981 won't be re-parented, so it will be busted
# Shutdown the old master first.
if not brutal:
tablet_62344.mquery('', mysql_flavor().demote_master_commands())
# Get the position of the old master and wait for the new one to catch up.
utils.wait_for_replication_pos(tablet_62344, tablet_62044)
# Promote the new master.
tablet_62044.mquery('', mysql_flavor().promote_slave_commands())
new_pos = mysql_flavor().master_position(tablet_62044)
logging.debug('New master position: %s', str(new_pos))
# Use 'localhost' as hostname because Travis CI worker hostnames
# are too long for MySQL replication.
change_master_cmds = mysql_flavor().change_master_commands(
'localhost',
tablet_62044.mysql_port,
new_pos)
# 62344 will now be a slave of 62044
tablet_62344.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
change_master_cmds +
['START SLAVE'])
# 41983 will be a slave of 62044
tablet_41983.mquery('', ['STOP SLAVE'] +
change_master_cmds +
['START SLAVE'])
# in brutal mode, we kill the old master first
# and delete its tablet record
if brutal:
tablet_62344.kill_vttablet()
utils.run_vtctl(['DeleteTablet', '-allow_master',
tablet_62344.tablet_alias], auto_log=True)
base_time = time.time()
# update topology with the new server
utils.run_vtctl(['TabletExternallyReparented', tablet_62044.tablet_alias],
mode=utils.VTCTL_VTCTL, auto_log=True)
self._test_reparent_from_outside_check(brutal, base_time)
if not brutal:
tablet_62344.kill_vttablet()
tablet.kill_tablets([tablet_31981, tablet_62044, tablet_41983])
def _test_reparent_from_outside_check(self, brutal, base_time):
# make sure the shard replication graph is fine
shard_replication = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
'test_keyspace/0'])
hashed_nodes = {}
for node in shard_replication['nodes']:
key = node['tablet_alias']['cell']+'-'+str(node['tablet_alias']['uid'])
hashed_nodes[key] = True
logging.debug('Got shard replication nodes: %s', str(hashed_nodes))
expected_nodes = {
'test_nj-41983': True,
'test_nj-62044': True,
}
if not brutal:
expected_nodes['test_nj-62344'] = True
self.assertEqual(expected_nodes, hashed_nodes,
'Got unexpected nodes: %s != %s' % (str(expected_nodes),
str(hashed_nodes)))
# make sure the master status page says it's the master
tablet_62044_master_status = tablet_62044.get_status()
self.assertIn('Keyspace: test_keyspace Shard: 0 Tablet Type: MASTER',
tablet_62044_master_status)
# make sure the master health stream says it's the master too
# (health check is disabled on these servers, force it first)
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
health = utils.run_vtctl_json(['VtTabletStreamHealth',
'-count', '1',
tablet_62044.tablet_alias])
self.assertEqual(health['target']['tablet_type'], topodata_pb2.MASTER)
# have to compare the int version, or the rounding errors can break
self.assertTrue(
health['tablet_externally_reparented_timestamp'] >= int(base_time))
def test_reparent_with_down_slave(self, shard_id='0'):
"""See if a missing slave can be safely reparented after the fact."""
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up a master mysql and vttablet
tablet_62344.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
# Create a few slaves for testing reparenting.
tablet_62044.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_31981.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
tablet_41983.init_tablet('replica', 'test_keyspace', shard_id, start=True,
wait_for_start=False)
# wait for all tablets to start
for t in [tablet_62344, tablet_62044, tablet_31981, tablet_41983]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/' + shard_id,
tablet_62344.tablet_alias])
utils.validate_topology(ping_tablets=True)
tablet_62344.mquery('vt_test_keyspace', self._create_vt_insert_test)
utils.wait_procs([tablet_41983.shutdown_mysql()])
# Perform a graceful reparent operation. It will fail as one tablet is down.
_, stderr = utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_62044.tablet_alias],
expect_fail=True)
self.assertIn('TabletManager.SetMaster on test_nj-0000041983 error', stderr)
# insert data into the new master, check the connected slaves work
self._populate_vt_insert_test(tablet_62044, 3)
self._check_vt_insert_test(tablet_31981, 3)
self._check_vt_insert_test(tablet_62344, 3)
# restart mysql on the old slave, should still be connecting to the
# old master
utils.wait_procs([tablet_41983.start_mysql()])
utils.pause('check orphan')
# Use the same PlannedReparentShard command to fix up the tablet.
utils.run_vtctl(['PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/' + shard_id,
'-new_master', tablet_62044.tablet_alias])
# wait until it gets the data
self._check_vt_insert_test(tablet_41983, 3)
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
tablet_31981])
def test_change_type_semi_sync(self):
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# Create new names for tablets, so this test is less confusing.
master = tablet_62344
replica = tablet_62044
rdonly1 = tablet_41983
rdonly2 = tablet_31981
# create the database so vttablets start, as they are serving
for t in [master, replica, rdonly1, rdonly2]:
t.create_db('vt_test_keyspace')
# Start up a soon-to-be master, one replica and two rdonly.
master.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
replica.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
rdonly1.init_tablet('rdonly', 'test_keyspace', '0', start=True,
wait_for_start=False)
rdonly2.init_tablet('rdonly', 'test_keyspace', '0', start=True,
wait_for_start=False)
for t in [master, replica, rdonly1, rdonly2]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent assuming that all the datasets are
# identical.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
master.tablet_alias], auto_log=True)
utils.validate_topology(ping_tablets=True)
self._check_master_tablet(master)
# Stop replication on rdonly1, to make sure when we make it
# replica it doesn't start again.
# Note we do a similar test for replica -> rdonly below.
utils.run_vtctl(['StopSlave', rdonly1.tablet_alias])
# Check semi-sync on slaves.
# The flag is only an indication of the value to use next time
# we turn replication on, so also check the status.
# rdonly1 is not replicating, so its status is off.
replica.check_db_var('rpl_semi_sync_slave_enabled', 'ON')
rdonly1.check_db_var('rpl_semi_sync_slave_enabled', 'OFF')
rdonly2.check_db_var('rpl_semi_sync_slave_enabled', 'OFF')
replica.check_db_status('rpl_semi_sync_slave_status', 'ON')
rdonly1.check_db_status('rpl_semi_sync_slave_status', 'OFF')
rdonly2.check_db_status('rpl_semi_sync_slave_status', 'OFF')
# Change replica to rdonly while replicating, should turn off semi-sync,
# and restart replication.
utils.run_vtctl(['ChangeSlaveType', replica.tablet_alias, 'rdonly'],
auto_log=True)
replica.check_db_var('rpl_semi_sync_slave_enabled', 'OFF')
replica.check_db_status('rpl_semi_sync_slave_status', 'OFF')
# Change rdonly1 to replica, should turn on semi-sync, and not start rep.
utils.run_vtctl(['ChangeSlaveType', rdonly1.tablet_alias, 'replica'],
auto_log=True)
rdonly1.check_db_var('rpl_semi_sync_slave_enabled', 'ON')
rdonly1.check_db_status('rpl_semi_sync_slave_status', 'OFF')
slave_io_running = 10
slave_sql_running = 11
s = rdonly1.mquery('', 'show slave status')
self.assertEqual(s[0][slave_io_running], 'No')
self.assertEqual(s[0][slave_sql_running], 'No')
# Now change from replica back to rdonly, make sure replication is
# still not enabled.
utils.run_vtctl(['ChangeSlaveType', rdonly1.tablet_alias, 'rdonly'],
auto_log=True)
rdonly1.check_db_var('rpl_semi_sync_slave_enabled', 'OFF')
rdonly1.check_db_status('rpl_semi_sync_slave_status', 'OFF')
s = rdonly1.mquery('', 'show slave status')
self.assertEqual(s[0][slave_io_running], 'No')
self.assertEqual(s[0][slave_sql_running], 'No')
# Change rdonly2 to replica, should turn on semi-sync, and restart rep.
utils.run_vtctl(['ChangeSlaveType', rdonly2.tablet_alias, 'replica'],
auto_log=True)
rdonly2.check_db_var('rpl_semi_sync_slave_enabled', 'ON')
rdonly2.check_db_status('rpl_semi_sync_slave_status', 'ON')
# Clean up.
tablet.kill_tablets([master, replica, rdonly1, rdonly2])
def test_reparent_doesnt_hang_if_master_fails(self):
"""Makes sure a failed master populate doesn't hang."""
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
# create the database so vttablets start, as they are serving
tablet_62344.create_db('vt_test_keyspace')
tablet_62044.create_db('vt_test_keyspace')
tablet_41983.create_db('vt_test_keyspace')
tablet_31981.create_db('vt_test_keyspace')
# Start up vttablet
for t in [tablet_62344, tablet_62044, tablet_31981, tablet_41983]:
t.init_tablet('replica', 'test_keyspace', '0', start=True,
wait_for_start=False)
# wait for all tablets to start
for t in [tablet_62344, tablet_62044, tablet_31981, tablet_41983]:
t.wait_for_vttablet_state('NOT_SERVING')
# Force the slaves to reparent. Will create the _vt.reparent_journal table.
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
tablet_62344.tablet_alias])
utils.validate_topology(ping_tablets=True)
# Change the schema of the _vt.reparent_journal table, so that
# inserts into it will fail. That will make the master fail.
tablet_62344.mquery('_vt', 'ALTER TABLE reparent_journal'
' DROP COLUMN replication_position')
# Perform a planned reparent operation, the master will fail the
# insert. The slaves should then abort right away. If this fails,
# the test will timeout.
_, stderr = utils.run_vtctl(['-wait-time', '3600s',
'PlannedReparentShard',
'-keyspace_shard', 'test_keyspace/0',
'-new_master', tablet_62044.tablet_alias],
expect_fail=True)
self.assertIn('master failed to PopulateReparentJournal',
stderr)
# Clean up the tablets.
tablet.kill_tablets([tablet_62344, tablet_62044, tablet_41983,
tablet_31981])
if __name__ == '__main__':
utils.main()