зеркало из https://github.com/github/vitess-gh.git
839 строки
33 KiB
Python
Executable File
839 строки
33 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# Copyright 2017 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# vim: tabstop=8 expandtab shiftwidth=2 softtabstop=2
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
import unittest
|
|
import urllib
|
|
import urllib2
|
|
import re
|
|
|
|
import MySQLdb
|
|
|
|
import environment
|
|
import utils
|
|
import tablet
|
|
from mysql_flavor import mysql_flavor
|
|
from protocols_flavor import protocols_flavor
|
|
|
|
from vtproto import topodata_pb2
|
|
|
|
tablet_62344 = tablet.Tablet(62344)
|
|
tablet_62044 = tablet.Tablet(62044)
|
|
|
|
# regexp to check if the tablet status page reports healthy,
|
|
# regardless of actual replication lag
|
|
healthy_expr = re.compile(r'Current status: <span.+?>healthy')
|
|
|
|
|
|
def setUpModule():
|
|
try:
|
|
topo_flavor = environment.topo_server().flavor()
|
|
environment.topo_server().setup()
|
|
|
|
# start mysql instance external to the test
|
|
setup_procs = [
|
|
tablet_62344.init_mysql(),
|
|
tablet_62044.init_mysql(),
|
|
]
|
|
utils.Vtctld().start()
|
|
utils.wait_procs(setup_procs)
|
|
except:
|
|
tearDownModule()
|
|
raise
|
|
|
|
|
|
def tearDownModule():
|
|
utils.required_teardown()
|
|
if utils.options.skip_teardown:
|
|
return
|
|
|
|
teardown_procs = [
|
|
tablet_62344.teardown_mysql(),
|
|
tablet_62044.teardown_mysql(),
|
|
]
|
|
utils.wait_procs(teardown_procs, raise_on_error=False)
|
|
|
|
environment.topo_server().teardown()
|
|
utils.kill_sub_processes()
|
|
utils.remove_tmp_files()
|
|
|
|
tablet_62344.remove_tree()
|
|
tablet_62044.remove_tree()
|
|
|
|
|
|
class TestTabletManager(unittest.TestCase):
|
|
|
|
def tearDown(self):
|
|
tablet.Tablet.check_vttablet_count()
|
|
environment.topo_server().wipe()
|
|
for t in [tablet_62344, tablet_62044]:
|
|
t.reset_replication()
|
|
t.set_semi_sync_enabled(master=False)
|
|
t.clean_dbs()
|
|
|
|
# run twice to check behavior with existing znode data
|
|
def test_sanity(self):
|
|
self._test_sanity()
|
|
self._test_sanity()
|
|
|
|
def _test_sanity(self):
|
|
# Start up a master mysql and vttablet
|
|
utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
|
|
utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
|
|
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
|
|
utils.validate_topology()
|
|
|
|
# if these statements don't run before the tablet it will wedge
|
|
# waiting for the db to become accessible. this is more a bug than
|
|
# a feature.
|
|
tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
|
|
self._populate_vt_select_test)
|
|
|
|
tablet_62344.start_vttablet()
|
|
|
|
# make sure the query service is started right away.
|
|
qr = tablet_62344.execute('select id, msg from vt_select_test')
|
|
self.assertEqual(len(qr['rows']), 4,
|
|
'expected 4 rows in vt_select_test: %s' % str(qr))
|
|
self.assertEqual(qr['fields'][0]['name'], 'id')
|
|
self.assertEqual(qr['fields'][1]['name'], 'msg')
|
|
|
|
# test exclude_field_names to vttablet works as expected.
|
|
qr = tablet_62344.execute('select id, msg from vt_select_test',
|
|
execute_options='included_fields:TYPE_ONLY ')
|
|
self.assertEqual(len(qr['rows']), 4,
|
|
'expected 4 rows in vt_select_test: %s' % str(qr))
|
|
self.assertNotIn('name', qr['fields'][0])
|
|
self.assertNotIn('name', qr['fields'][1])
|
|
|
|
# make sure direct dba queries work
|
|
query_result = utils.run_vtctl_json(
|
|
['ExecuteFetchAsDba', '-json', tablet_62344.tablet_alias,
|
|
'select * from vt_test_keyspace.vt_select_test'])
|
|
self.assertEqual(
|
|
len(query_result['rows']), 4,
|
|
'expected 4 rows in vt_select_test: %s' % str(query_result))
|
|
self.assertEqual(
|
|
len(query_result['fields']), 2,
|
|
'expected 2 fields in vt_select_test: %s' % str(query_result))
|
|
|
|
# check Ping / RefreshState / RefreshStateByShard
|
|
utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
|
|
utils.run_vtctl(['RefreshState', tablet_62344.tablet_alias])
|
|
utils.run_vtctl(['RefreshStateByShard', 'test_keyspace/0'])
|
|
utils.run_vtctl(['RefreshStateByShard', '--cells=test_nj',
|
|
'test_keyspace/0'])
|
|
|
|
# Quickly check basic actions.
|
|
utils.run_vtctl(['SetReadOnly', tablet_62344.tablet_alias])
|
|
utils.wait_db_read_only(62344)
|
|
|
|
utils.run_vtctl(['SetReadWrite', tablet_62344.tablet_alias])
|
|
utils.check_db_read_write(62344)
|
|
|
|
utils.validate_topology()
|
|
utils.run_vtctl(['ValidateKeyspace', 'test_keyspace'])
|
|
# not pinging tablets, as it enables replication checks, and they
|
|
# break because we only have a single master, no slaves
|
|
utils.run_vtctl(['ValidateShard', '-ping-tablets=false',
|
|
'test_keyspace/0'])
|
|
|
|
tablet_62344.kill_vttablet()
|
|
|
|
_create_vt_select_test = '''create table vt_select_test (
|
|
id bigint auto_increment,
|
|
msg varchar(64),
|
|
primary key (id)
|
|
) Engine=InnoDB'''
|
|
|
|
_populate_vt_select_test = [
|
|
"insert into vt_select_test (msg) values ('test %s')" % x
|
|
for x in xrange(4)]
|
|
|
|
# Test if a vttablet can be pointed at an existing mysql
|
|
# We point 62044 at 62344's mysql and try to read from it.
|
|
def test_command_line(self):
|
|
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
|
|
tablet_62044.init_tablet('master', 'test_keyspace', '0')
|
|
tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
|
|
self._populate_vt_select_test)
|
|
|
|
# mycnf_server_id prevents vttablet from reading the mycnf
|
|
extra_args = [
|
|
'-mycnf_server_id', str(tablet_62044.tablet_uid),
|
|
'-db_socket', os.path.join(tablet_62344.tablet_dir, 'mysql.sock')]
|
|
# supports_backup=False prevents vttablet from trying to restore
|
|
tablet_62044.start_vttablet(extra_args=extra_args, supports_backups=False)
|
|
qr = tablet_62044.execute('select id, msg from vt_select_test')
|
|
self.assertEqual(len(qr['rows']), 4,
|
|
'expected 4 rows in vt_select_test: %s' % str(qr))
|
|
|
|
# Verify backup fails
|
|
try:
|
|
utils.run_vtctl(['Backup', tablet_62044.tablet_alias])
|
|
except Exception as e:
|
|
self.assertIn('cannot perform backup without my.cnf', str(e))
|
|
else:
|
|
self.assertFail('did not get an exception')
|
|
|
|
tablet_62044.kill_vttablet()
|
|
|
|
def test_actions_and_timeouts(self):
|
|
# Start up a master mysql and vttablet
|
|
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
|
|
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0')
|
|
utils.validate_topology()
|
|
tablet_62344.create_db('vt_test_keyspace')
|
|
tablet_62344.start_vttablet()
|
|
|
|
utils.run_vtctl(['Ping', tablet_62344.tablet_alias])
|
|
|
|
# schedule long action in the background, sleep a little bit to make sure
|
|
# it started to run
|
|
args = (environment.binary_args('vtctl') +
|
|
environment.topo_server().flags() +
|
|
['-tablet_manager_protocol',
|
|
protocols_flavor().tablet_manager_protocol(),
|
|
'-tablet_protocol', protocols_flavor().tabletconn_protocol(),
|
|
'-log_dir', environment.vtlogroot,
|
|
'Sleep', tablet_62344.tablet_alias, '10s'])
|
|
bg = utils.run_bg(args)
|
|
time.sleep(3)
|
|
|
|
# try a frontend RefreshState that should timeout as the tablet is busy
|
|
# running the other one
|
|
_, stderr = utils.run_vtctl(
|
|
['-wait-time', '3s', 'RefreshState', tablet_62344.tablet_alias],
|
|
expect_fail=True)
|
|
self.assertIn(protocols_flavor().rpc_timeout_message(), stderr)
|
|
|
|
# wait for the background vtctl
|
|
bg.wait()
|
|
|
|
tablet_62344.kill_vttablet()
|
|
|
|
def _run_hook(self, params, expected_status, expected_stdout,
|
|
expected_stderr):
|
|
hr = utils.run_vtctl_json(['ExecuteHook', tablet_62344.tablet_alias] +
|
|
params)
|
|
self.assertEqual(hr['ExitStatus'], expected_status)
|
|
if isinstance(expected_stdout, basestring):
|
|
self.assertEqual(hr['Stdout'], expected_stdout)
|
|
else:
|
|
found = False
|
|
for exp in expected_stdout:
|
|
if hr['Stdout'] == exp:
|
|
found = True
|
|
break
|
|
if not found:
|
|
self.assertFail(
|
|
'cannot find expected %s in %s' %
|
|
(str(expected_stdout), hr['Stdout']))
|
|
if expected_stderr[-1:] == '%':
|
|
self.assertEqual(
|
|
hr['Stderr'][:len(expected_stderr)-1],
|
|
expected_stderr[:len(expected_stderr)-1])
|
|
else:
|
|
self.assertEqual(hr['Stderr'], expected_stderr)
|
|
|
|
def test_hook(self):
|
|
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
|
|
|
|
# create the database so vttablets start, as it is serving
|
|
tablet_62344.create_db('vt_test_keyspace')
|
|
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0', start=True)
|
|
|
|
# test a regular program works
|
|
self._run_hook(['test.sh', '--flag1', '--param1=hello'], 0,
|
|
['TABLET_ALIAS: test_nj-0000062344\n'
|
|
'PARAM: --flag1\n'
|
|
'PARAM: --param1=hello\n',
|
|
'TABLET_ALIAS: test_nj-0000062344\n'
|
|
'PARAM: --param1=hello\n'
|
|
'PARAM: --flag1\n'],
|
|
'')
|
|
|
|
# test stderr output
|
|
self._run_hook(['test.sh', '--to-stderr'], 0,
|
|
'TABLET_ALIAS: test_nj-0000062344\n'
|
|
'PARAM: --to-stderr\n',
|
|
'ERR: --to-stderr\n')
|
|
|
|
# test commands that fail
|
|
self._run_hook(['test.sh', '--exit-error'], 1,
|
|
'TABLET_ALIAS: test_nj-0000062344\n'
|
|
'PARAM: --exit-error\n',
|
|
'ERROR: exit status 1\n')
|
|
|
|
# test hook that is not present
|
|
self._run_hook(['not_here.sh'], -1,
|
|
'',
|
|
'missing hook /%') # cannot go further, local path
|
|
|
|
# test hook with invalid name
|
|
_, err = utils.run_vtctl(['--alsologtostderr', 'ExecuteHook',
|
|
tablet_62344.tablet_alias,
|
|
'/bin/ls'],
|
|
mode=utils.VTCTL_VTCTL, trap_output=True,
|
|
raise_on_error=False)
|
|
expected = "action failed: ExecuteHook hook name cannot have a '/' in it"
|
|
self.assertIn(expected, err)
|
|
|
|
tablet_62344.kill_vttablet()
|
|
|
|
def test_shard_replication_fix(self):
|
|
utils.run_vtctl(['CreateKeyspace', 'test_keyspace'])
|
|
|
|
tablet_62344.create_db('vt_test_keyspace')
|
|
tablet_62044.create_db('vt_test_keyspace')
|
|
|
|
# one master one replica
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0')
|
|
tablet_62044.init_tablet('replica', 'test_keyspace', '0')
|
|
|
|
# make sure the replica is in the replication graph
|
|
before_bogus = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
|
|
'test_keyspace/0'])
|
|
self.assertEqual(2, len(before_bogus['nodes']),
|
|
'wrong shard replication nodes before: %s' %
|
|
str(before_bogus))
|
|
|
|
# manually add a bogus entry to the replication graph, and check
|
|
# it is removed by ShardReplicationFix
|
|
utils.run_vtctl(['ShardReplicationAdd', 'test_keyspace/0',
|
|
'test_nj-0000066666'], auto_log=True)
|
|
with_bogus = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
|
|
'test_keyspace/0'])
|
|
self.assertEqual(3, len(with_bogus['nodes']),
|
|
'wrong shard replication nodes with bogus: %s' %
|
|
str(with_bogus))
|
|
utils.run_vtctl(['ShardReplicationFix', 'test_nj', 'test_keyspace/0'],
|
|
auto_log=True)
|
|
after_fix = utils.run_vtctl_json(['GetShardReplication', 'test_nj',
|
|
'test_keyspace/0'])
|
|
self.assertEqual(2, len(after_fix['nodes']),
|
|
'wrong shard replication nodes after fix: %s' %
|
|
str(after_fix))
|
|
|
|
def check_healthz(self, t, expected):
|
|
if expected:
|
|
self.assertEqual('ok\n', t.get_healthz())
|
|
else:
|
|
with self.assertRaises(urllib2.HTTPError):
|
|
t.get_healthz()
|
|
|
|
def test_health_check(self):
|
|
# one master, one replica that starts not initialized
|
|
# (for the replica, we let vttablet do the InitTablet)
|
|
tablet_62344.init_tablet('replica', 'test_keyspace', '0')
|
|
|
|
for t in tablet_62344, tablet_62044:
|
|
t.create_db('vt_test_keyspace')
|
|
|
|
tablet_62344.start_vttablet(wait_for_state=None)
|
|
tablet_62044.start_vttablet(wait_for_state=None,
|
|
lameduck_period='5s',
|
|
init_tablet_type='replica',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0')
|
|
|
|
tablet_62344.wait_for_vttablet_state('NOT_SERVING')
|
|
tablet_62044.wait_for_vttablet_state('NOT_SERVING')
|
|
self.check_healthz(tablet_62044, False)
|
|
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
tablet_62344.tablet_alias])
|
|
|
|
# make sure the unhealthy slave goes to healthy
|
|
tablet_62044.wait_for_vttablet_state('SERVING')
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
self.check_healthz(tablet_62044, True)
|
|
|
|
# make sure the master is still master
|
|
ti = utils.run_vtctl_json(['GetTablet', tablet_62344.tablet_alias])
|
|
self.assertEqual(ti['type'], topodata_pb2.MASTER,
|
|
'unexpected master type: %s' % ti['type'])
|
|
|
|
# stop replication at the mysql level.
|
|
tablet_62044.mquery('', 'stop slave')
|
|
# vttablet replication_reporter should restart it.
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
# insert something on the master and wait for it on the slave.
|
|
tablet_62344.mquery('vt_test_keyspace', [
|
|
'create table repl_test_table (id int)',
|
|
'insert into repl_test_table values (123)'], write=True)
|
|
timeout = 10.0
|
|
while True:
|
|
try:
|
|
result = tablet_62044.mquery('vt_test_keyspace',
|
|
'select * from repl_test_table')
|
|
if result:
|
|
self.assertEqual(result[0][0], 123L)
|
|
break
|
|
except MySQLdb.ProgrammingError:
|
|
# Maybe the create table hasn't gone trough yet, we wait more
|
|
logging.exception('got this exception waiting for data, ignoring it')
|
|
timeout = utils.wait_step(
|
|
'slave replication repaired by replication_reporter', timeout)
|
|
|
|
# stop replication, make sure we don't go unhealthy.
|
|
# (we have a baseline as well, so the time should be good).
|
|
utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
self.check_healthz(tablet_62044, True)
|
|
|
|
# make sure status web page is healthy
|
|
self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr)
|
|
|
|
# make sure the health stream is updated
|
|
health = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
tablet_62044.tablet_alias])
|
|
self.assertTrue(('seconds_behind_master' not in health['realtime_stats']) or
|
|
(health['realtime_stats']['seconds_behind_master'] < 30),
|
|
'got unexpected health: %s' % str(health))
|
|
self.assertIn('serving', health)
|
|
|
|
# then restart replication, make sure we stay healthy
|
|
utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
|
|
# make sure status web page is healthy
|
|
self.assertRegexpMatches(tablet_62044.get_status(), healthy_expr)
|
|
|
|
# now test VtTabletStreamHealth returns the right thing
|
|
stdout, _ = utils.run_vtctl(['VtTabletStreamHealth',
|
|
'-count', '2',
|
|
tablet_62044.tablet_alias],
|
|
trap_output=True, auto_log=True)
|
|
lines = stdout.splitlines()
|
|
self.assertEqual(len(lines), 2)
|
|
for line in lines:
|
|
logging.debug('Got health: %s', line)
|
|
data = json.loads(line)
|
|
self.assertIn('realtime_stats', data)
|
|
self.assertIn('serving', data)
|
|
self.assertTrue(data['serving'])
|
|
self.assertNotIn('health_error', data['realtime_stats'])
|
|
self.assertNotIn('tablet_externally_reparented_timestamp', data)
|
|
self.assertEqual('test_keyspace', data['target']['keyspace'])
|
|
self.assertEqual('0', data['target']['shard'])
|
|
self.assertEqual(topodata_pb2.REPLICA, data['target']['tablet_type'])
|
|
|
|
# Test that VtTabletStreamHealth reports a QPS >0.0.
|
|
# Therefore, issue several reads first.
|
|
# NOTE: This may be potentially flaky because we'll observe a QPS >0.0
|
|
# exactly "once" for the duration of one sampling interval (5s) and
|
|
# after that we'll see 0.0 QPS rates again. If this becomes actually
|
|
# flaky, we need to read continuously in a separate thread.
|
|
for _ in range(10):
|
|
tablet_62044.execute('select 1 from dual')
|
|
# This may take up to 5 seconds to become true because we sample the query
|
|
# counts for the rates only every 5 seconds (see query_service_stats.go).
|
|
timeout = 10
|
|
while True:
|
|
health = utils.run_vtctl_json(['VtTabletStreamHealth', '-count', '1',
|
|
tablet_62044.tablet_alias])
|
|
if health['realtime_stats'].get('qps', 0.0) > 0.0:
|
|
break
|
|
timeout = utils.wait_step('QPS >0.0 seen', timeout)
|
|
|
|
# kill the tablets
|
|
tablet.kill_tablets([tablet_62344, tablet_62044])
|
|
|
|
def test_health_check_drained_state_does_not_shutdown_query_service(self):
|
|
# This test is similar to test_health_check, but has the following
|
|
# differences:
|
|
# - the second tablet is an 'rdonly' and not a 'replica'
|
|
# - the second tablet will be set to 'drained' and we expect that
|
|
# the query service won't be shutdown
|
|
|
|
# Setup master and rdonly tablets.
|
|
tablet_62344.init_tablet('replica', 'test_keyspace', '0')
|
|
|
|
for t in tablet_62344, tablet_62044:
|
|
t.create_db('vt_test_keyspace')
|
|
|
|
# Note we only have a master and a rdonly. So we can't enable
|
|
# semi-sync in this case, as the rdonly slaves don't semi-sync ack.
|
|
tablet_62344.start_vttablet(wait_for_state=None, enable_semi_sync=False)
|
|
tablet_62044.start_vttablet(wait_for_state=None,
|
|
init_tablet_type='rdonly',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0',
|
|
enable_semi_sync=False)
|
|
|
|
tablet_62344.wait_for_vttablet_state('NOT_SERVING')
|
|
tablet_62044.wait_for_vttablet_state('NOT_SERVING')
|
|
self.check_healthz(tablet_62044, False)
|
|
|
|
# Enable replication.
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
tablet_62344.tablet_alias])
|
|
|
|
# Trigger healthcheck to save time waiting for the next interval.
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
tablet_62044.wait_for_vttablet_state('SERVING')
|
|
self.check_healthz(tablet_62044, True)
|
|
|
|
# Change from rdonly to drained and stop replication. (These
|
|
# actions are similar to the SplitClone vtworker command
|
|
# implementation.) The tablet will stay healthy, and the
|
|
# query service is still running.
|
|
utils.run_vtctl(['ChangeSlaveType', tablet_62044.tablet_alias, 'drained'])
|
|
# Trying to drain the same tablet again, should error
|
|
try:
|
|
utils.run_vtctl(['ChangeSlaveType', tablet_62044.tablet_alias, 'drained'])
|
|
except Exception as e:
|
|
s = str(e)
|
|
self.assertIn("already drained", s)
|
|
utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias])
|
|
# Trigger healthcheck explicitly to avoid waiting for the next interval.
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'drained')
|
|
self.check_healthz(tablet_62044, True)
|
|
# Query service is still running.
|
|
tablet_62044.wait_for_vttablet_state('SERVING')
|
|
|
|
# Restart replication. Tablet will become healthy again.
|
|
utils.run_vtctl(['ChangeSlaveType', tablet_62044.tablet_alias, 'rdonly'])
|
|
utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias])
|
|
self.check_healthz(tablet_62044, True)
|
|
|
|
# kill the tablets
|
|
tablet.kill_tablets([tablet_62344, tablet_62044])
|
|
|
|
def test_no_mysql_healthcheck(self):
|
|
"""This test starts a vttablet with no mysql port, while mysql is down.
|
|
|
|
It makes sure vttablet will start properly and be unhealthy.
|
|
Then we start mysql, and make sure vttablet becomes healthy.
|
|
"""
|
|
# we need replication to be enabled, so the slave tablet can be healthy.
|
|
for t in tablet_62344, tablet_62044:
|
|
t.create_db('vt_test_keyspace')
|
|
pos = mysql_flavor().master_position(tablet_62344)
|
|
# Use 'localhost' as hostname because Travis CI worker hostnames
|
|
# are too long for MySQL replication.
|
|
change_master_cmds = mysql_flavor().change_master_commands(
|
|
'localhost',
|
|
tablet_62344.mysql_port,
|
|
pos)
|
|
tablet_62044.mquery('', ['RESET MASTER', 'RESET SLAVE'] +
|
|
change_master_cmds + ['START SLAVE'])
|
|
|
|
# now shutdown all mysqld
|
|
shutdown_procs = [
|
|
tablet_62344.shutdown_mysql(),
|
|
tablet_62044.shutdown_mysql(),
|
|
]
|
|
utils.wait_procs(shutdown_procs)
|
|
|
|
# start the tablets, wait for them to be NOT_SERVING (mysqld not there)
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0')
|
|
tablet_62044.init_tablet('replica', 'test_keyspace', '0',
|
|
include_mysql_port=False)
|
|
for t in tablet_62344, tablet_62044:
|
|
# Since MySQL is down at this point and we want the tablet to start up
|
|
# successfully, we have to use supports_backups=False.
|
|
t.start_vttablet(wait_for_state=None, supports_backups=False,
|
|
full_mycnf_args=True, include_mysql_port=False)
|
|
for t in tablet_62344, tablet_62044:
|
|
t.wait_for_vttablet_state('NOT_SERVING')
|
|
self.check_healthz(t, False)
|
|
|
|
# Tell slave to not try to repair replication in healthcheck.
|
|
# The StopSlave will ultimately fail because mysqld is not running,
|
|
# But vttablet should remember that it's not supposed to fix replication.
|
|
utils.run_vtctl(['StopSlave', tablet_62044.tablet_alias], expect_fail=True)
|
|
|
|
# The above notice to not fix replication should survive tablet restart.
|
|
tablet_62044.kill_vttablet()
|
|
tablet_62044.start_vttablet(wait_for_state='NOT_SERVING',
|
|
full_mycnf_args=True, include_mysql_port=False,
|
|
supports_backups=False)
|
|
|
|
# restart mysqld
|
|
start_procs = [
|
|
tablet_62344.start_mysql(),
|
|
tablet_62044.start_mysql(),
|
|
]
|
|
utils.wait_procs(start_procs)
|
|
|
|
# the master should still be healthy
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias],
|
|
auto_log=True)
|
|
self.check_healthz(tablet_62344, True)
|
|
|
|
# the slave will now be healthy, but report a very high replication
|
|
# lag, because it can't figure out what it exactly is.
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
|
|
auto_log=True)
|
|
tablet_62044.wait_for_vttablet_state('SERVING')
|
|
self.check_healthz(tablet_62044, True)
|
|
|
|
health = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
tablet_62044.tablet_alias])
|
|
self.assertIn('seconds_behind_master', health['realtime_stats'])
|
|
self.assertEqual(health['realtime_stats']['seconds_behind_master'], 7200)
|
|
self.assertIn('serving', health)
|
|
|
|
# restart replication, wait until health check goes small
|
|
# (a value of zero is default and won't be in structure)
|
|
utils.run_vtctl(['StartSlave', tablet_62044.tablet_alias])
|
|
timeout = 10
|
|
while True:
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62044.tablet_alias],
|
|
auto_log=True)
|
|
health = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
tablet_62044.tablet_alias])
|
|
if 'serving' in health and (
|
|
('seconds_behind_master' not in health['realtime_stats']) or
|
|
(health['realtime_stats']['seconds_behind_master'] < 30)):
|
|
break
|
|
timeout = utils.wait_step('health delay goes back down', timeout)
|
|
|
|
# wait for the tablet to fix its mysql port
|
|
for t in tablet_62344, tablet_62044:
|
|
# wait for mysql port to show up
|
|
timeout = 10
|
|
while True:
|
|
ti = utils.run_vtctl_json(['GetTablet', t.tablet_alias])
|
|
if 'mysql' in ti['port_map']:
|
|
break
|
|
timeout = utils.wait_step('mysql port in tablet record', timeout)
|
|
self.assertEqual(ti['port_map']['mysql'], t.mysql_port)
|
|
|
|
# all done
|
|
tablet.kill_tablets([tablet_62344, tablet_62044])
|
|
|
|
def test_repeated_init_shard_master(self):
|
|
"""Test that using InitShardMaster can go back and forth between 2 hosts."""
|
|
for t in tablet_62344, tablet_62044:
|
|
t.create_db('vt_test_keyspace')
|
|
t.start_vttablet(wait_for_state=None,
|
|
lameduck_period='5s',
|
|
init_tablet_type='replica',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0')
|
|
|
|
# Tablets are not replicating, so they won't be healthy.
|
|
for t in tablet_62344, tablet_62044:
|
|
t.wait_for_vttablet_state('NOT_SERVING')
|
|
self.check_healthz(t, False)
|
|
|
|
# Pick one master out of the two.
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
tablet_62344.tablet_alias])
|
|
|
|
# Run health check on both, make sure they are both healthy.
|
|
# Also make sure the types are correct.
|
|
for t in tablet_62344, tablet_62044:
|
|
utils.run_vtctl(['RunHealthCheck', t.tablet_alias], auto_log=True)
|
|
self.check_healthz(t, True)
|
|
utils.wait_for_tablet_type(tablet_62344.tablet_alias, 'master', timeout=0)
|
|
utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'replica', timeout=0)
|
|
|
|
# Pick the other one as master, make sure they are still healthy.
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
tablet_62044.tablet_alias])
|
|
|
|
# Run health check on both, make sure they are both healthy.
|
|
# Also make sure the types are correct.
|
|
for t in tablet_62344, tablet_62044:
|
|
utils.run_vtctl(['RunHealthCheck', t.tablet_alias], auto_log=True)
|
|
self.check_healthz(t, True)
|
|
utils.wait_for_tablet_type(tablet_62344.tablet_alias, 'replica', timeout=0)
|
|
utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'master', timeout=0)
|
|
|
|
# Come back to the original guy.
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
tablet_62344.tablet_alias])
|
|
|
|
# Run health check on both, make sure they are both healthy.
|
|
# Also make sure the types are correct.
|
|
for t in tablet_62344, tablet_62044:
|
|
utils.run_vtctl(['RunHealthCheck', t.tablet_alias], auto_log=True)
|
|
self.check_healthz(t, True)
|
|
utils.wait_for_tablet_type(tablet_62344.tablet_alias, 'master', timeout=0)
|
|
utils.wait_for_tablet_type(tablet_62044.tablet_alias, 'replica', timeout=0)
|
|
|
|
# And done.
|
|
tablet.kill_tablets([tablet_62344, tablet_62044])
|
|
|
|
def test_fallback_policy(self):
|
|
tablet_62344.create_db('vt_test_keyspace')
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0')
|
|
tablet_62344.start_vttablet(security_policy='bogus')
|
|
f = urllib.urlopen('http://localhost:%d/queryz' % int(tablet_62344.port))
|
|
response = f.read()
|
|
f.close()
|
|
self.assertIn('not allowed', response)
|
|
tablet_62344.kill_vttablet()
|
|
|
|
def test_ignore_health_error(self):
|
|
tablet_62344.create_db('vt_test_keyspace')
|
|
|
|
# Starts unhealthy because of "no slave status" (not replicating).
|
|
tablet_62344.start_vttablet(wait_for_state='NOT_SERVING',
|
|
init_tablet_type='replica',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0')
|
|
|
|
# Force it healthy.
|
|
utils.run_vtctl(['IgnoreHealthError', tablet_62344.tablet_alias,
|
|
'.*no slave status.*'])
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias],
|
|
auto_log=True)
|
|
tablet_62344.wait_for_vttablet_state('SERVING')
|
|
self.check_healthz(tablet_62344, True)
|
|
|
|
# Turn off the force-healthy.
|
|
utils.run_vtctl(['IgnoreHealthError', tablet_62344.tablet_alias, ''])
|
|
utils.run_vtctl(['RunHealthCheck', tablet_62344.tablet_alias],
|
|
auto_log=True)
|
|
tablet_62344.wait_for_vttablet_state('NOT_SERVING')
|
|
self.check_healthz(tablet_62344, False)
|
|
|
|
tablet_62344.kill_vttablet()
|
|
|
|
def test_master_restart_sets_ter_timestamp(self):
|
|
"""Test that TER timestamp is set when we restart the MASTER vttablet.
|
|
|
|
TER = TabletExternallyReparented.
|
|
See StreamHealthResponse.tablet_externally_reparented_timestamp for details.
|
|
"""
|
|
master, replica = tablet_62344, tablet_62044
|
|
tablets = [master, replica]
|
|
# Start vttablets. Our future master is initially a REPLICA.
|
|
for t in tablets:
|
|
t.create_db('vt_test_keyspace')
|
|
for t in tablets:
|
|
t.start_vttablet(wait_for_state='NOT_SERVING',
|
|
init_tablet_type='replica',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0')
|
|
|
|
# Initialize tablet as MASTER.
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/0',
|
|
master.tablet_alias])
|
|
master.wait_for_vttablet_state('SERVING')
|
|
|
|
# Capture the current TER.
|
|
health = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
master.tablet_alias])
|
|
self.assertEqual(topodata_pb2.MASTER, health['target']['tablet_type'])
|
|
self.assertIn('tablet_externally_reparented_timestamp', health)
|
|
self.assertGreater(health['tablet_externally_reparented_timestamp'], 0,
|
|
'TER on MASTER must be set after InitShardMaster')
|
|
|
|
# Restart the MASTER vttablet.
|
|
master.kill_vttablet()
|
|
master.start_vttablet(wait_for_state='SERVING',
|
|
init_tablet_type='replica',
|
|
init_keyspace='test_keyspace',
|
|
init_shard='0')
|
|
|
|
# Make sure that the TER increased i.e. it was set to the current time.
|
|
health_after_restart = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
master.tablet_alias])
|
|
self.assertEqual(topodata_pb2.MASTER,
|
|
health_after_restart['target']['tablet_type'])
|
|
self.assertIn('tablet_externally_reparented_timestamp',
|
|
health_after_restart)
|
|
self.assertGreater(
|
|
health_after_restart['tablet_externally_reparented_timestamp'],
|
|
health['tablet_externally_reparented_timestamp'],
|
|
'When the MASTER vttablet was restarted, the TER timestamp must be set'
|
|
' to the current time.')
|
|
|
|
# Shutdown.
|
|
for t in tablets:
|
|
t.kill_vttablet()
|
|
|
|
def test_topocustomrule(self):
|
|
# Empty rule file.
|
|
topocustomrule_file = environment.tmproot+'/rules.json'
|
|
with open(topocustomrule_file, 'w') as fd:
|
|
fd.write('[]\n')
|
|
|
|
# Start up a master mysql and vttablet
|
|
utils.run_vtctl(['CreateKeyspace', '-force', 'test_keyspace'])
|
|
utils.run_vtctl(['createshard', '-force', 'test_keyspace/0'])
|
|
tablet_62344.init_tablet('master', 'test_keyspace', '0', parent=False)
|
|
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'])
|
|
utils.validate_topology()
|
|
|
|
# Copy config file into topo.
|
|
topocustomrule_path = '/keyspaces/test_keyspace/configs/CustomRules'
|
|
utils.run_vtctl(['TopoCp', '-to_topo', topocustomrule_file,
|
|
topocustomrule_path])
|
|
|
|
# Put some data in, start master.
|
|
tablet_62344.populate('vt_test_keyspace', self._create_vt_select_test,
|
|
self._populate_vt_select_test)
|
|
tablet_62344.start_vttablet(topocustomrule_path=topocustomrule_path)
|
|
|
|
# make sure the query service is working
|
|
qr = tablet_62344.execute('select id, msg from vt_select_test')
|
|
self.assertEqual(len(qr['rows']), 4,
|
|
'expected 4 rows in vt_select_test: %s' % str(qr))
|
|
|
|
# Now update the topocustomrule file.
|
|
with open(topocustomrule_file, 'w') as fd:
|
|
fd.write('''
|
|
[{
|
|
"Name": "rule1",
|
|
"Description": "disallow select on table vt_select_test",
|
|
"TableNames" : ["vt_select_test"],
|
|
"Query" : "(select)|(SELECT)"
|
|
}]''')
|
|
utils.run_vtctl(['TopoCp', '-to_topo', topocustomrule_file,
|
|
topocustomrule_path])
|
|
|
|
# And wait until the query fails with the right error.
|
|
timeout = 10.0
|
|
while True:
|
|
try:
|
|
tablet_62344.execute('select id, msg from vt_select_test')
|
|
timeout = utils.wait_step('query rule in place', timeout)
|
|
except Exception as e:
|
|
print e
|
|
expected = ('disallowed due to rule: disallow select'
|
|
' on table vt_select_test')
|
|
self.assertIn(expected, str(e))
|
|
break
|
|
|
|
# Cleanup.
|
|
tablet_62344.kill_vttablet()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
utils.main()
|