зеркало из https://github.com/github/vitess-gh.git
1282 строки
57 KiB
Python
Executable File
1282 строки
57 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Copyright 2017 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""This test covers a resharding scenario of an already sharded keyspace.
|
|
|
|
We start with shards -80 and 80-. We then split 80- into 80-c0 and c0-.
|
|
|
|
This test is the main resharding test. It not only tests the regular resharding
|
|
workflow for an horizontal split, but also a lot of error cases and side
|
|
effects, like:
|
|
- migrating the traffic one cell at a time.
|
|
- migrating rdonly traffic back and forth.
|
|
- making sure we can't migrate the master until replica and rdonly are migrated.
|
|
- has a background thread to insert data during migration.
|
|
- tests a destination shard master failover while replication is running.
|
|
- tests a filtered replication source replacement while filtered replication
|
|
is running.
|
|
- tests 'vtctl SourceShardAdd' and 'vtctl SourceShardDelete'.
|
|
- makes sure the key range rules are properly enforced on masters.
|
|
"""
|
|
|
|
import threading
|
|
import time
|
|
|
|
import logging
|
|
import unittest
|
|
|
|
import base_sharding
|
|
import environment
|
|
import tablet
|
|
import utils
|
|
|
|
from vtproto import topodata_pb2
|
|
from vtdb import keyrange_constants
|
|
|
|
# initial shards
|
|
# range '' - 80
|
|
shard_0_master = tablet.Tablet()
|
|
shard_0_replica = tablet.Tablet()
|
|
shard_0_ny_rdonly = tablet.Tablet(cell='ny')
|
|
# range 80 - ''
|
|
shard_1_master = tablet.Tablet()
|
|
shard_1_slave1 = tablet.Tablet()
|
|
shard_1_slave2 = tablet.Tablet()
|
|
shard_1_ny_rdonly = tablet.Tablet(cell='ny')
|
|
shard_1_rdonly1 = tablet.Tablet()
|
|
|
|
# split shards
|
|
# range 80 - c0
|
|
shard_2_master = tablet.Tablet()
|
|
shard_2_replica1 = tablet.Tablet()
|
|
shard_2_replica2 = tablet.Tablet()
|
|
shard_2_rdonly1 = tablet.Tablet()
|
|
# range c0 - ''
|
|
shard_3_master = tablet.Tablet()
|
|
shard_3_replica = tablet.Tablet()
|
|
shard_3_rdonly1 = tablet.Tablet()
|
|
|
|
shard_2_tablets = [shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_2_rdonly1]
|
|
shard_3_tablets = [shard_3_master, shard_3_replica, shard_3_rdonly1]
|
|
all_tablets = ([shard_0_master, shard_0_replica, shard_0_ny_rdonly,
|
|
shard_1_master, shard_1_slave1, shard_1_slave2,
|
|
shard_1_ny_rdonly, shard_1_rdonly1] +
|
|
shard_2_tablets + shard_3_tablets)
|
|
|
|
|
|
def setUpModule():
|
|
try:
|
|
environment.topo_server().setup()
|
|
setup_procs = [t.init_mysql(use_rbr=base_sharding.use_rbr)
|
|
for t in all_tablets]
|
|
utils.Vtctld().start()
|
|
utils.wait_procs(setup_procs)
|
|
except:
|
|
tearDownModule()
|
|
raise
|
|
|
|
|
|
def tearDownModule():
|
|
utils.required_teardown()
|
|
if utils.options.skip_teardown:
|
|
return
|
|
|
|
teardown_procs = [t.teardown_mysql() for t in all_tablets]
|
|
utils.wait_procs(teardown_procs, raise_on_error=False)
|
|
environment.topo_server().teardown()
|
|
utils.kill_sub_processes()
|
|
utils.remove_tmp_files()
|
|
for t in all_tablets:
|
|
t.remove_tree()
|
|
|
|
|
|
# InsertThread will insert a value into the timestamps table, and then
|
|
# every 1/5s will update its value with the current timestamp
|
|
class InsertThread(threading.Thread):
|
|
|
|
def __init__(self, tablet_obj, thread_name, thread_id, user_id,
|
|
keyspace_id):
|
|
threading.Thread.__init__(self)
|
|
self.tablet = tablet_obj
|
|
self.thread_name = thread_name
|
|
self.thread_id = thread_id
|
|
self.user_id = user_id
|
|
self.keyspace_id = keyspace_id
|
|
self.str_keyspace_id = utils.uint64_to_hex(keyspace_id)
|
|
self.done = False
|
|
|
|
self.tablet.mquery(
|
|
'vt_test_keyspace',
|
|
['begin',
|
|
'insert into timestamps(id, time_milli, custom_ksid_col) '
|
|
'values(%d, %d, 0x%x) '
|
|
'/* vtgate:: keyspace_id:%s */ /* user_id:%d */' %
|
|
(self.thread_id, long(time.time() * 1000), self.keyspace_id,
|
|
self.str_keyspace_id, self.user_id),
|
|
'commit'],
|
|
write=True, user='vt_app')
|
|
self.start()
|
|
|
|
def run(self):
|
|
try:
|
|
while not self.done:
|
|
self.tablet.mquery(
|
|
'vt_test_keyspace',
|
|
['begin',
|
|
'update timestamps set time_milli=%d '
|
|
'where id=%d /* vtgate:: keyspace_id:%s */ /* user_id:%d */' %
|
|
(long(time.time() * 1000), self.thread_id,
|
|
self.str_keyspace_id, self.user_id),
|
|
'commit'],
|
|
write=True, user='vt_app')
|
|
time.sleep(0.2)
|
|
except Exception: # pylint: disable=broad-except
|
|
logging.exception('InsertThread got exception.')
|
|
|
|
|
|
# MonitorLagThread will get values from a database, and compare the timestamp
|
|
# to evaluate lag. Since the qps is really low, and we send binlogs as chunks,
|
|
# the latency is pretty high (a few seconds).
|
|
class MonitorLagThread(threading.Thread):
|
|
|
|
def __init__(self, tablet_obj, thread_name, thread_id):
|
|
threading.Thread.__init__(self)
|
|
self.tablet = tablet_obj
|
|
self.thread_name = thread_name
|
|
self.thread_id = thread_id
|
|
self.done = False
|
|
self.max_lag_ms = 0
|
|
self.lag_sum_ms = 0
|
|
self.sample_count = 0
|
|
self.start()
|
|
|
|
def run(self):
|
|
try:
|
|
while not self.done:
|
|
result = self.tablet.mquery(
|
|
'vt_test_keyspace',
|
|
'select time_milli from timestamps where id=%d' %
|
|
self.thread_id)
|
|
if result:
|
|
lag_ms = long(time.time() * 1000) - long(result[0][0])
|
|
logging.debug('MonitorLagThread(%s) got %d ms',
|
|
self.thread_name, lag_ms)
|
|
self.sample_count += 1
|
|
self.lag_sum_ms += lag_ms
|
|
if lag_ms > self.max_lag_ms:
|
|
self.max_lag_ms = lag_ms
|
|
time.sleep(5.0)
|
|
except Exception: # pylint: disable=broad-except
|
|
logging.exception('MonitorLagThread got exception.')
|
|
|
|
|
|
class TestResharding(unittest.TestCase, base_sharding.BaseShardingTest):
|
|
|
|
# create_schema will create the same schema on the keyspace
|
|
# then insert some values
|
|
def _create_schema(self):
|
|
if base_sharding.keyspace_id_type == keyrange_constants.KIT_BYTES:
|
|
t = 'varbinary(64)'
|
|
else:
|
|
t = 'bigint(20) unsigned'
|
|
# Note that the primary key columns are not defined first on purpose to test
|
|
# that a reordered column list is correctly used everywhere in vtworker.
|
|
create_table_template = '''create table %s(
|
|
custom_ksid_col ''' + t + ''' not null,
|
|
msg varchar(64),
|
|
id bigint not null,
|
|
parent_id bigint not null,
|
|
primary key (parent_id, id),
|
|
index by_msg (msg)
|
|
) Engine=InnoDB'''
|
|
create_table_bindata_template = '''create table %s(
|
|
custom_ksid_col ''' + t + ''' not null,
|
|
id bigint not null,
|
|
parent_id bigint not null,
|
|
msg bit(8),
|
|
primary key (parent_id, id),
|
|
index by_msg (msg)
|
|
) Engine=InnoDB'''
|
|
create_view_template = (
|
|
'create view %s'
|
|
'(parent_id, id, msg, custom_ksid_col)'
|
|
'as select parent_id, id, msg, custom_ksid_col '
|
|
'from %s')
|
|
create_timestamp_table = '''create table timestamps(
|
|
id int not null,
|
|
time_milli bigint(20) unsigned not null,
|
|
custom_ksid_col ''' + t + ''' not null,
|
|
primary key (id)
|
|
) Engine=InnoDB'''
|
|
# Make sure that clone and diff work with tables which have no primary key.
|
|
# RBR only because Vitess requires the primary key for query rewrites if
|
|
# it is running with statement based replication.
|
|
create_no_pk_table = '''create table no_pk(
|
|
custom_ksid_col ''' + t + ''' not null,
|
|
msg varchar(64),
|
|
id bigint not null,
|
|
parent_id bigint not null
|
|
) Engine=InnoDB'''
|
|
create_unrelated_table = '''create table unrelated(
|
|
name varchar(64),
|
|
primary key (name)
|
|
) Engine=InnoDB'''
|
|
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_table_template % ('resharding1'),
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_table_template % ('resharding2'),
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_table_bindata_template % ('resharding3'),
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_view_template % ('view1', 'resharding1'),
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_timestamp_table,
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ApplySchema',
|
|
'-sql=' + create_unrelated_table,
|
|
'test_keyspace'],
|
|
auto_log=True)
|
|
if base_sharding.use_rbr:
|
|
utils.run_vtctl(['ApplySchema', '-sql=' + create_no_pk_table,
|
|
'test_keyspace'], auto_log=True)
|
|
|
|
def _insert_startup_values(self):
|
|
self._insert_value(shard_0_master, 'resharding1', 1, 'msg1',
|
|
0x1000000000000000)
|
|
self._insert_value(shard_1_master, 'resharding1', 2, 'msg2',
|
|
0x9000000000000000)
|
|
self._insert_value(shard_1_master, 'resharding1', 3, 'msg3',
|
|
0xD000000000000000)
|
|
self._insert_value(shard_0_master, 'resharding3', 1, 'a',
|
|
0x1000000000000000)
|
|
self._insert_value(shard_1_master, 'resharding3', 2, 'b',
|
|
0x9000000000000000)
|
|
self._insert_value(shard_1_master, 'resharding3', 3, 'c',
|
|
0xD000000000000000)
|
|
if base_sharding.use_rbr:
|
|
self._insert_value(shard_1_master, 'no_pk', 1, 'msg1',
|
|
0xA000000000000000)
|
|
# TODO(github.com/vitessio/vitess/issues/2880): Add more rows here such
|
|
# clone and diff would break when the insertion order on source and
|
|
# dest shards is different.
|
|
|
|
def _check_startup_values(self):
|
|
# check first value is in the right shard
|
|
for t in shard_2_tablets:
|
|
self._check_value(t, 'resharding1', 2, 'msg2', 0x9000000000000000)
|
|
self._check_value(t, 'resharding3', 2, 'b', 0x9000000000000000)
|
|
for t in shard_3_tablets:
|
|
self._check_value(t, 'resharding1', 2, 'msg2', 0x9000000000000000,
|
|
should_be_here=False)
|
|
self._check_value(t, 'resharding3', 2, 'b', 0x9000000000000000,
|
|
should_be_here=False)
|
|
|
|
# check second value is in the right shard too
|
|
for t in shard_2_tablets:
|
|
self._check_value(t, 'resharding1', 3, 'msg3', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_value(t, 'resharding3', 3, 'c', 0xD000000000000000,
|
|
should_be_here=False)
|
|
for t in shard_3_tablets:
|
|
self._check_value(t, 'resharding1', 3, 'msg3', 0xD000000000000000)
|
|
self._check_value(t, 'resharding3', 3, 'c', 0xD000000000000000)
|
|
|
|
if base_sharding.use_rbr:
|
|
for t in shard_2_tablets:
|
|
self._check_value(t, 'no_pk', 1, 'msg1', 0xA000000000000000)
|
|
for t in shard_3_tablets:
|
|
self._check_value(t, 'no_pk', 1, 'msg1', 0xA000000000000000,
|
|
should_be_here=False)
|
|
|
|
def _insert_lots(self, count, base=0):
|
|
for i in xrange(count):
|
|
self._insert_value(shard_1_master, 'resharding1', 10000 + base + i,
|
|
'msg-range1-%d' % i, 0xA000000000000000 + base + i)
|
|
self._insert_value(shard_1_master, 'resharding1', 20000 + base + i,
|
|
'msg-range2-%d' % i, 0xE000000000000000 + base + i)
|
|
|
|
def _exec_multi_shard_dmls(self):
|
|
mids = [10000001, 10000002, 10000003]
|
|
msg_ids = ['msg-id10000001', 'msg-id10000002', 'msg-id10000003']
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000,
|
|
0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding1', mids,
|
|
msg_ids, keyspace_ids)
|
|
|
|
mids = [10000004, 10000005]
|
|
msg_ids = ['msg-id10000004', 'msg-id10000005']
|
|
keyspace_ids = [0xD000000000000000, 0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding1', mids,
|
|
msg_ids, keyspace_ids)
|
|
|
|
mids = [10000011, 10000012, 10000013]
|
|
msg_ids = ['msg-id10000011', 'msg-id10000012', 'msg-id10000013']
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000, 0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding1', mids,
|
|
msg_ids, keyspace_ids)
|
|
|
|
# This update targets two shards.
|
|
self._exec_non_annotated_update(shard_1_master, 'resharding1',
|
|
[10000011, 10000012], 'update1')
|
|
# This update targets one shard.
|
|
self._exec_non_annotated_update(shard_1_master, 'resharding1',
|
|
[10000013], 'update2')
|
|
|
|
mids = [10000014, 10000015, 10000016]
|
|
msg_ids = ['msg-id10000014', 'msg-id10000015', 'msg-id10000016']
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000, 0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding1', mids,
|
|
msg_ids, keyspace_ids)
|
|
|
|
# This delete targets two shards.
|
|
self._exec_non_annotated_delete(shard_1_master, 'resharding1',
|
|
[10000014, 10000015])
|
|
|
|
# This delete targets one shard.
|
|
self._exec_non_annotated_delete(shard_1_master, 'resharding1', [10000016])
|
|
|
|
# repeat DMLs for table with msg as bit(8)
|
|
mids = [10000001, 10000002, 10000003]
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000,
|
|
0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding3', mids,
|
|
['a','b','c'], keyspace_ids)
|
|
|
|
mids = [10000004, 10000005]
|
|
keyspace_ids = [0xD000000000000000, 0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding3', mids,
|
|
['d', 'e'], keyspace_ids)
|
|
mids = [10000011, 10000012, 10000013]
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000, 0xE000000000000000]
|
|
|
|
self._insert_multi_value(shard_1_master, 'resharding3', mids,
|
|
['k', 'l', 'm'], keyspace_ids)
|
|
|
|
# This update targets two shards.
|
|
self._exec_non_annotated_update(shard_1_master, 'resharding3',
|
|
[10000011, 10000012], 'g')
|
|
|
|
# This update targets one shard.
|
|
self._exec_non_annotated_update(shard_1_master, 'resharding3',
|
|
[10000013], 'h')
|
|
|
|
mids = [10000014, 10000015, 10000016]
|
|
keyspace_ids = [0x9000000000000000, 0xD000000000000000, 0xE000000000000000]
|
|
self._insert_multi_value(shard_1_master, 'resharding3', mids,
|
|
['n', 'o', 'p'], keyspace_ids)
|
|
|
|
# This delete targets two shards.
|
|
self._exec_non_annotated_delete(shard_1_master, 'resharding3',
|
|
[10000014, 10000015])
|
|
|
|
# This delete targets one shard.
|
|
self._exec_non_annotated_delete(shard_1_master, 'resharding3', [10000016])
|
|
|
|
def _check_multi_shard_values(self):
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000001, 'msg-id10000001', 0x9000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000002, 'msg-id10000002', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000003, 'msg-id10000003', 0xE000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000001, 'msg-id10000001', 0x9000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000002, 'msg-id10000002', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000003, 'msg-id10000003', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000004, 'msg-id10000004', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000005, 'msg-id10000005', 0xE000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000004, 'msg-id10000004', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000005, 'msg-id10000005', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding1', 10000011, 'update1', 0x9000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000012, 'update1', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding1', 10000013, 'update2', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding1', 10000014, 'msg-id10000014', 0x9000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding1', 10000015, 'msg-id10000015', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding1', 10000016, 'msg-id10000016', 0xF000000000000000,
|
|
should_be_here=False)
|
|
|
|
# checks for bit(8) table
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000001, 'a', 0x9000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000002, 'b', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000003, 'c', 0xE000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000001, 'a', 0x9000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000002, 'b', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000003, 'c', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000004, 'd', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000005, 'e', 0xE000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000004, 'd', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000005, 'e', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2],
|
|
'resharding3', 10000011, 'g', 0x9000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000012, 'g', 0xD000000000000000)
|
|
self._check_multi_dbs(
|
|
[shard_3_master, shard_3_replica],
|
|
'resharding3', 10000013, 'h', 0xE000000000000000)
|
|
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding3', 10000014, 'n', 0x9000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding3', 10000015, 'o', 0xD000000000000000,
|
|
should_be_here=False)
|
|
self._check_multi_dbs(
|
|
[shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_3_master, shard_3_replica],
|
|
'resharding3', 10000016, 'p', 0xF000000000000000,
|
|
should_be_here=False)
|
|
|
|
# _check_multi_dbs checks the row in multiple dbs.
|
|
def _check_multi_dbs(self, dblist, table, mid, msg, keyspace_id,
|
|
should_be_here=True):
|
|
for db in dblist:
|
|
self._check_value(db, table, mid, msg, keyspace_id, should_be_here)
|
|
|
|
# _check_lots returns how many of the values we have, in percents.
|
|
def _check_lots(self, count, base=0):
|
|
found = 0
|
|
for i in xrange(count):
|
|
if self._is_value_present_and_correct(shard_2_replica2, 'resharding1',
|
|
10000 + base + i, 'msg-range1-%d' %
|
|
i, 0xA000000000000000 + base + i):
|
|
found += 1
|
|
if self._is_value_present_and_correct(shard_3_replica, 'resharding1',
|
|
20000 + base + i, 'msg-range2-%d' %
|
|
i, 0xE000000000000000 + base + i):
|
|
found += 1
|
|
percent = found * 100 / count / 2
|
|
logging.debug('I have %d%% of the data', percent)
|
|
return percent
|
|
|
|
def _check_lots_timeout(self, count, threshold, timeout, base=0):
|
|
while True:
|
|
value = self._check_lots(count, base=base)
|
|
if value >= threshold:
|
|
return value
|
|
timeout = utils.wait_step('waiting for %d%% of the data' % threshold,
|
|
timeout, sleep_time=1)
|
|
|
|
# _check_lots_not_present makes sure no data is in the wrong shard
|
|
def _check_lots_not_present(self, count, base=0):
|
|
for i in xrange(count):
|
|
self._check_value(shard_3_replica, 'resharding1', 10000 + base + i,
|
|
'msg-range1-%d' % i, 0xA000000000000000 + base + i,
|
|
should_be_here=False)
|
|
self._check_value(shard_2_replica2, 'resharding1', 20000 + base + i,
|
|
'msg-range2-%d' % i, 0xE000000000000000 + base + i,
|
|
should_be_here=False)
|
|
|
|
def test_resharding(self):
|
|
# we're going to reparent and swap these two
|
|
global shard_2_master, shard_2_replica1
|
|
|
|
utils.run_vtctl(['CreateKeyspace',
|
|
'--sharding_column_name', 'bad_column',
|
|
'--sharding_column_type', 'bytes',
|
|
'test_keyspace'])
|
|
utils.run_vtctl(['SetKeyspaceShardingInfo', 'test_keyspace',
|
|
'custom_ksid_col', 'uint64'], expect_fail=True)
|
|
utils.run_vtctl(['SetKeyspaceShardingInfo', '-force',
|
|
'test_keyspace',
|
|
'custom_ksid_col', base_sharding.keyspace_id_type])
|
|
|
|
shard_0_master.init_tablet('replica', 'test_keyspace', '-80')
|
|
shard_0_replica.init_tablet('replica', 'test_keyspace', '-80')
|
|
shard_0_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '-80')
|
|
shard_1_master.init_tablet('replica', 'test_keyspace', '80-')
|
|
shard_1_slave1.init_tablet('replica', 'test_keyspace', '80-')
|
|
shard_1_slave2.init_tablet('replica', 'test_keyspace', '80-')
|
|
shard_1_ny_rdonly.init_tablet('rdonly', 'test_keyspace', '80-')
|
|
shard_1_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-')
|
|
|
|
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
|
|
ks = utils.run_vtctl_json(['GetSrvKeyspace', 'test_nj', 'test_keyspace'])
|
|
self.assertEqual(ks['sharding_column_name'], 'custom_ksid_col')
|
|
|
|
# we set full_mycnf_args to True as a test in the KIT_BYTES case
|
|
full_mycnf_args = (base_sharding.keyspace_id_type ==
|
|
keyrange_constants.KIT_BYTES)
|
|
|
|
# create databases so vttablet can start behaving somewhat normally
|
|
for t in [shard_0_master, shard_0_replica, shard_0_ny_rdonly,
|
|
shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
|
|
shard_1_rdonly1]:
|
|
t.create_db('vt_test_keyspace')
|
|
t.start_vttablet(wait_for_state=None, full_mycnf_args=full_mycnf_args,
|
|
binlog_use_v3_resharding_mode=False)
|
|
|
|
# wait for the tablets (replication is not setup, they won't be healthy)
|
|
for t in [shard_0_master, shard_0_replica, shard_0_ny_rdonly,
|
|
shard_1_master, shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
|
|
shard_1_rdonly1]:
|
|
t.wait_for_vttablet_state('NOT_SERVING')
|
|
|
|
# reparent to make the tablets work
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/-80',
|
|
shard_0_master.tablet_alias], auto_log=True)
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-',
|
|
shard_1_master.tablet_alias], auto_log=True)
|
|
|
|
# check the shards
|
|
shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
|
|
self.assertIn('-80', shards, 'unexpected shards: %s' % str(shards))
|
|
self.assertIn('80-', shards, 'unexpected shards: %s' % str(shards))
|
|
self.assertEqual(len(shards), 2, 'unexpected shards: %s' % str(shards))
|
|
|
|
# create the tables
|
|
self._create_schema()
|
|
self._insert_startup_values()
|
|
|
|
# run a health check on source replicas so they respond to discovery
|
|
# (for binlog players) and on the source rdonlys (for workers)
|
|
for t in [shard_0_replica, shard_1_slave1]:
|
|
utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
|
|
for t in [shard_0_ny_rdonly, shard_1_ny_rdonly, shard_1_rdonly1]:
|
|
utils.run_vtctl(['RunHealthCheck', t.tablet_alias])
|
|
|
|
# create the split shards
|
|
shard_2_master.init_tablet('replica', 'test_keyspace', '80-c0')
|
|
shard_2_replica1.init_tablet('replica', 'test_keyspace', '80-c0')
|
|
shard_2_replica2.init_tablet('replica', 'test_keyspace', '80-c0')
|
|
shard_2_rdonly1.init_tablet('rdonly', 'test_keyspace', '80-c0')
|
|
shard_3_master.init_tablet('replica', 'test_keyspace', 'c0-')
|
|
shard_3_replica.init_tablet('replica', 'test_keyspace', 'c0-')
|
|
shard_3_rdonly1.init_tablet('rdonly', 'test_keyspace', 'c0-')
|
|
|
|
# start vttablet on the split shards (no db created,
|
|
# so they're all not serving)
|
|
shard_2_master.start_vttablet(wait_for_state=None,
|
|
binlog_use_v3_resharding_mode=False)
|
|
shard_3_master.start_vttablet(wait_for_state=None,
|
|
binlog_use_v3_resharding_mode=False)
|
|
for t in [shard_2_replica1, shard_2_replica2, shard_2_rdonly1,
|
|
shard_3_replica, shard_3_rdonly1]:
|
|
t.start_vttablet(wait_for_state=None,
|
|
binlog_use_v3_resharding_mode=False)
|
|
for t in [shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_2_rdonly1,
|
|
shard_3_master, shard_3_replica, shard_3_rdonly1]:
|
|
t.wait_for_vttablet_state('NOT_SERVING')
|
|
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/80-c0',
|
|
shard_2_master.tablet_alias], auto_log=True)
|
|
utils.run_vtctl(['InitShardMaster', '-force', 'test_keyspace/c0-',
|
|
shard_3_master.tablet_alias], auto_log=True)
|
|
|
|
# check the shards
|
|
shards = utils.run_vtctl_json(['FindAllShardsInKeyspace', 'test_keyspace'])
|
|
for s in ['-80', '80-', '80-c0', 'c0-']:
|
|
self.assertIn(s, shards, 'unexpected shards: %s' % str(shards))
|
|
self.assertEqual(len(shards), 4, 'unexpected shards: %s' % str(shards))
|
|
|
|
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
|
|
auto_log=True)
|
|
utils.check_srv_keyspace(
|
|
'test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
|
|
# disable shard_1_slave2, so we're sure filtered replication will go
|
|
# from shard_1_slave1
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'spare'])
|
|
shard_1_slave2.wait_for_vttablet_state('NOT_SERVING')
|
|
|
|
# we need to create the schema, and the worker will do data copying
|
|
for keyspace_shard in ('test_keyspace/80-c0', 'test_keyspace/c0-'):
|
|
utils.run_vtctl(['CopySchemaShard', '--exclude_tables', 'unrelated',
|
|
shard_1_rdonly1.tablet_alias, keyspace_shard],
|
|
auto_log=True)
|
|
|
|
# Run vtworker as daemon for the following SplitClone commands.
|
|
worker_proc, worker_port, worker_rpc_port = utils.run_vtworker_bg(
|
|
['--cell', 'test_nj', '--command_display_interval', '10ms',
|
|
'--use_v3_resharding_mode=false'],
|
|
auto_log=True)
|
|
|
|
# Copy the data from the source to the destination shards.
|
|
# --max_tps is only specified to enable the throttler and ensure that the
|
|
# code is executed. But the intent here is not to throttle the test, hence
|
|
# the rate limit is set very high.
|
|
#
|
|
# Initial clone (online).
|
|
workerclient_proc = utils.run_vtworker_client_bg(
|
|
['SplitClone',
|
|
'--offline=false',
|
|
'--exclude_tables', 'unrelated',
|
|
'--chunk_count', '10',
|
|
'--min_rows_per_chunk', '1',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'--max_tps', '9999',
|
|
'test_keyspace/80-'],
|
|
worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
|
|
2, 0, 0, 0)
|
|
|
|
# Reset vtworker such that we can run the next command.
|
|
workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
|
|
# Test the correct handling of keyspace_id changes which happen after
|
|
# the first clone.
|
|
# Let row 2 go to shard 3 instead of shard 2.
|
|
shard_1_master.mquery('vt_test_keyspace',
|
|
'update resharding1 set'
|
|
' custom_ksid_col=0xD000000000000000 WHERE id=2',
|
|
write=True)
|
|
workerclient_proc = utils.run_vtworker_client_bg(
|
|
['SplitClone',
|
|
'--offline=false',
|
|
'--exclude_tables', 'unrelated',
|
|
'--chunk_count', '10',
|
|
'--min_rows_per_chunk', '1',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'--max_tps', '9999',
|
|
'test_keyspace/80-'],
|
|
worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
# Row 2 will be deleted from shard 2 and inserted to shard 3.
|
|
self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
|
|
1, 0, 1, 1)
|
|
self._check_value(shard_2_master, 'resharding1', 2, 'msg2',
|
|
0xD000000000000000, should_be_here=False)
|
|
self._check_value(shard_3_master, 'resharding1', 2, 'msg2',
|
|
0xD000000000000000)
|
|
# Reset vtworker such that we can run the next command.
|
|
workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
|
|
# Move row 2 back to shard 2 from shard 3 by changing the keyspace_id again.
|
|
shard_1_master.mquery('vt_test_keyspace',
|
|
'update resharding1 set'
|
|
' custom_ksid_col=0x9000000000000000 WHERE id=2',
|
|
write=True)
|
|
workerclient_proc = utils.run_vtworker_client_bg(
|
|
['SplitClone',
|
|
'--offline=false',
|
|
'--exclude_tables', 'unrelated',
|
|
'--chunk_count', '10',
|
|
'--min_rows_per_chunk', '1',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'--max_tps', '9999',
|
|
'test_keyspace/80-'],
|
|
worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
# Row 2 will be deleted from shard 3 and inserted to shard 2.
|
|
self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
|
|
1, 0, 1, 1)
|
|
self._check_value(shard_2_master, 'resharding1', 2, 'msg2',
|
|
0x9000000000000000)
|
|
self._check_value(shard_3_master, 'resharding1', 2, 'msg2',
|
|
0x9000000000000000, should_be_here=False)
|
|
# Reset vtworker such that we can run the next command.
|
|
workerclient_proc = utils.run_vtworker_client_bg(['Reset'], worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
|
|
# Modify the destination shard. SplitClone will revert the changes.
|
|
# Delete row 2 (provokes an insert).
|
|
shard_2_master.mquery('vt_test_keyspace',
|
|
'delete from resharding1 where id=2', write=True)
|
|
# Update row 3 (provokes an update).
|
|
shard_3_master.mquery('vt_test_keyspace',
|
|
"update resharding1 set msg='msg-not-3' where id=3",
|
|
write=True)
|
|
# Insert row 4 and 5 (provokes a delete).
|
|
self._insert_value(shard_3_master, 'resharding1', 4, 'msg4',
|
|
0xD000000000000000)
|
|
self._insert_value(shard_3_master, 'resharding1', 5, 'msg5',
|
|
0xD000000000000000)
|
|
|
|
workerclient_proc = utils.run_vtworker_client_bg(
|
|
['SplitClone',
|
|
'--exclude_tables', 'unrelated',
|
|
'--chunk_count', '10',
|
|
'--min_rows_per_chunk', '1',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'--max_tps', '9999',
|
|
'test_keyspace/80-'],
|
|
worker_rpc_port)
|
|
utils.wait_procs([workerclient_proc])
|
|
# Change tablet, which was taken offline, back to rdonly.
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias,
|
|
'rdonly'], auto_log=True)
|
|
self.verify_reconciliation_counters(worker_port, 'Online', 'resharding1',
|
|
1, 1, 2, 0)
|
|
self.verify_reconciliation_counters(worker_port, 'Offline', 'resharding1',
|
|
0, 0, 0, 2)
|
|
# Terminate worker daemon because it is no longer needed.
|
|
utils.kill_sub_process(worker_proc, soft=True)
|
|
|
|
# check the startup values are in the right place
|
|
self._check_startup_values()
|
|
|
|
# check the schema too
|
|
utils.run_vtctl(['ValidateSchemaKeyspace', '--exclude_tables=unrelated',
|
|
'test_keyspace'], auto_log=True)
|
|
|
|
# Verify vreplication table entries
|
|
result = shard_2_master.mquery('_vt', 'select * from vreplication')
|
|
self.assertEqual(len(result), 1)
|
|
self.assertEqual(result[0][1], 'SplitClone')
|
|
self.assertEqual(result[0][2],
|
|
'keyspace:"test_keyspace" shard:"80-" '
|
|
'key_range:<start:"\\200" end:"\\300" > ')
|
|
|
|
result = shard_3_master.mquery('_vt', 'select * from vreplication')
|
|
self.assertEqual(len(result), 1)
|
|
self.assertEqual(result[0][1], 'SplitClone')
|
|
self.assertEqual(result[0][2],
|
|
'keyspace:"test_keyspace" shard:"80-" key_range:<start:"\\300" > ')
|
|
|
|
# check the binlog players are running and exporting vars
|
|
self.check_destination_master(shard_2_master, ['test_keyspace/80-'])
|
|
self.check_destination_master(shard_3_master, ['test_keyspace/80-'])
|
|
# When the binlog players/filtered replication is turned on, the query
|
|
# service must be turned off on the destination masters.
|
|
# The tested behavior is a safeguard to prevent that somebody can
|
|
# accidentally modify data on the destination masters while they are not
|
|
# migrated yet and the source shards are still the source of truth.
|
|
shard_2_master.wait_for_vttablet_state('NOT_SERVING')
|
|
shard_3_master.wait_for_vttablet_state('NOT_SERVING')
|
|
|
|
# check that binlog server exported the stats vars
|
|
self.check_binlog_server_vars(shard_1_slave1, horizontal=True)
|
|
|
|
# Check that the throttler was enabled.
|
|
# The stream id is hard-coded as 1, which is the first id generated
|
|
# through auto-inc.
|
|
self.check_throttler_service(shard_2_master.rpc_endpoint(),
|
|
['BinlogPlayer/1'], 9999)
|
|
self.check_throttler_service(shard_3_master.rpc_endpoint(),
|
|
['BinlogPlayer/1'], 9999)
|
|
|
|
# testing filtered replication: insert a bunch of data on shard 1,
|
|
# check we get most of it after a few seconds, wait for binlog server
|
|
# timeout, check we get all of it.
|
|
logging.debug('Inserting lots of data on source shard')
|
|
self._insert_lots(1000)
|
|
logging.debug('Executing MultiValue Insert Queries')
|
|
self._exec_multi_shard_dmls()
|
|
logging.debug('Checking 80 percent of data is sent quickly')
|
|
v = self._check_lots_timeout(1000, 80, 5)
|
|
if v != 100:
|
|
# small optimization: only do this check if we don't have all the data
|
|
# already anyway.
|
|
logging.debug('Checking all data goes through eventually')
|
|
self._check_lots_timeout(1000, 100, 20)
|
|
logging.debug('Checking no data was sent the wrong way')
|
|
self._check_lots_not_present(1000)
|
|
|
|
logging.debug('Checking MultiValue Insert Queries')
|
|
self._check_multi_shard_values()
|
|
self.check_binlog_player_vars(shard_2_master, ['test_keyspace/80-'],
|
|
seconds_behind_master_max=30)
|
|
self.check_binlog_player_vars(shard_3_master, ['test_keyspace/80-'],
|
|
seconds_behind_master_max=30)
|
|
self.check_binlog_server_vars(shard_1_slave1, horizontal=True,
|
|
min_statements=1000, min_transactions=1000)
|
|
|
|
# use vtworker to compare the data (after health-checking the destination
|
|
# rdonly tablets so discovery works)
|
|
utils.run_vtctl(['RunHealthCheck', shard_3_rdonly1.tablet_alias])
|
|
|
|
if base_sharding.use_multi_split_diff:
|
|
logging.debug('Running vtworker MultiSplitDiff')
|
|
utils.run_vtworker(['-cell', 'test_nj',
|
|
'--use_v3_resharding_mode=false',
|
|
'MultiSplitDiff',
|
|
'--exclude_tables', 'unrelated',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'test_keyspace/80-'],
|
|
auto_log=True)
|
|
else:
|
|
logging.debug('Running vtworker SplitDiff')
|
|
utils.run_vtworker(['-cell', 'test_nj',
|
|
'--use_v3_resharding_mode=false',
|
|
'SplitDiff',
|
|
'--exclude_tables', 'unrelated',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'test_keyspace/c0-'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
|
|
auto_log=True)
|
|
|
|
utils.pause('Good time to test vtworker for diffs')
|
|
|
|
# get status for destination master tablets, make sure we have it all
|
|
if base_sharding.use_rbr:
|
|
# We submitted non-annotated DMLs, that are properly routed
|
|
# with RBR, but not with SBR. So the first shard counts
|
|
# are smaller. In the second shard, we submitted statements
|
|
# that affect more than one keyspace id. These will result
|
|
# in two queries with RBR. So the count there is higher.
|
|
self.check_running_binlog_player(shard_2_master, 4036, 2016)
|
|
self.check_running_binlog_player(shard_3_master, 4056, 2016)
|
|
else:
|
|
self.check_running_binlog_player(shard_2_master, 4044, 2016)
|
|
self.check_running_binlog_player(shard_3_master, 4048, 2016)
|
|
|
|
# start a thread to insert data into shard_1 in the background
|
|
# with current time, and monitor the delay
|
|
insert_thread_1 = InsertThread(shard_1_master, 'insert_low', 1, 10000,
|
|
0x9000000000000000)
|
|
insert_thread_2 = InsertThread(shard_1_master, 'insert_high', 2, 10001,
|
|
0xD000000000000000)
|
|
monitor_thread_1 = MonitorLagThread(shard_2_replica2, 'insert_low', 1)
|
|
monitor_thread_2 = MonitorLagThread(shard_3_replica, 'insert_high', 2)
|
|
|
|
# tests a failover switching serving to a different replica
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_slave2.tablet_alias, 'replica'])
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_slave1.tablet_alias, 'spare'])
|
|
shard_1_slave2.wait_for_vttablet_state('SERVING')
|
|
shard_1_slave1.wait_for_vttablet_state('NOT_SERVING')
|
|
utils.run_vtctl(['RunHealthCheck', shard_1_slave2.tablet_alias])
|
|
|
|
# test data goes through again
|
|
logging.debug('Inserting lots of data on source shard')
|
|
self._insert_lots(1000, base=1000)
|
|
logging.debug('Checking 80 percent of data was sent quickly')
|
|
self._check_lots_timeout(1000, 80, 5, base=1000)
|
|
self.check_binlog_server_vars(shard_1_slave2, horizontal=True,
|
|
min_statements=800, min_transactions=800)
|
|
|
|
# check we can't migrate the master just yet
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
|
|
expect_fail=True)
|
|
|
|
# check query service is off on master 2 and master 3, as filtered
|
|
# replication is enabled. Even health check that is enabled on
|
|
# master 3 should not interfere (we run it to be sure).
|
|
utils.run_vtctl(['RunHealthCheck', shard_3_master.tablet_alias],
|
|
auto_log=True)
|
|
for master in [shard_2_master, shard_3_master]:
|
|
utils.check_tablet_query_service(self, master, False, False)
|
|
stream_health = utils.run_vtctl_json(['VtTabletStreamHealth',
|
|
'-count', '1',
|
|
master.tablet_alias])
|
|
logging.debug('Got health: %s', str(stream_health))
|
|
self.assertIn('realtime_stats', stream_health)
|
|
self.assertNotIn('serving', stream_health)
|
|
|
|
# check the destination master 3 is healthy, even though its query
|
|
# service is not running (if not healthy this would exception out)
|
|
shard_3_master.get_healthz()
|
|
|
|
# now serve rdonly from the split shards, in test_nj only
|
|
utils.run_vtctl(['MigrateServedTypes', '--cells=test_nj',
|
|
'test_keyspace/80-', 'rdonly'], auto_log=True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_srv_keyspace('test_ny', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
|
|
utils.check_tablet_query_service(self, shard_1_ny_rdonly, True, False)
|
|
utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)
|
|
|
|
# Shouldn't be able to rebuild keyspace graph while migration is on going
|
|
# (i.e there are records that have tablet controls set)
|
|
utils.run_vtctl(['RebuildKeyspaceGraph', 'test_keyspace'],
|
|
auto_log=True,
|
|
expect_fail=True,
|
|
)
|
|
|
|
# rerun migrate to ensure it doesn't fail
|
|
# skip refresh to make it go faster
|
|
utils.run_vtctl(['MigrateServedTypes', '--cells=test_nj',
|
|
'-skip-refresh-state=true',
|
|
'test_keyspace/80-', 'rdonly'], auto_log=True)
|
|
|
|
# now serve rdonly from the split shards, everywhere
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'rdonly'],
|
|
auto_log=True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_srv_keyspace('test_ny', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_tablet_query_service(self, shard_0_ny_rdonly, True, False)
|
|
utils.check_tablet_query_service(self, shard_1_ny_rdonly, False, True)
|
|
utils.check_tablet_query_service(self, shard_1_rdonly1, False, True)
|
|
|
|
# rerun migrate to ensure it doesn't fail
|
|
# skip refresh to make it go faster
|
|
utils.run_vtctl(['MigrateServedTypes', '-skip-refresh-state=true',
|
|
'test_keyspace/80-', 'rdonly'], auto_log=True)
|
|
|
|
# then serve replica from the split shards
|
|
destination_shards = ['80-c0', 'c0-']
|
|
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
|
|
auto_log=True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-c0 c0-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_tablet_query_service(self, shard_1_slave2, False, True)
|
|
|
|
# move replica back and forth
|
|
utils.run_vtctl(
|
|
['MigrateServedTypes', '-reverse', 'test_keyspace/80-', 'replica'],
|
|
auto_log=True)
|
|
# After a backwards migration, queryservice should be enabled on
|
|
# source and disabled on destinations
|
|
utils.check_tablet_query_service(self, shard_1_slave2, True, False)
|
|
# Destination tablets would have query service disabled for other
|
|
# reasons than the migration, so check the shard record instead of
|
|
# the tablets directly.
|
|
utils.check_shard_query_services(self, 'test_nj', 'test_keyspace', destination_shards,
|
|
topodata_pb2.REPLICA, False)
|
|
utils.check_shard_query_services(self, 'test_ny', 'test_keyspace', destination_shards,
|
|
topodata_pb2.REPLICA, False)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'replica'],
|
|
auto_log=True)
|
|
# After a forwards migration, queryservice should be disabled on
|
|
# source and enabled on destinations
|
|
utils.check_tablet_query_service(self, shard_1_slave2, False, True)
|
|
# Destination tablets would have query service disabled for other
|
|
# reasons than the migration, so check the shard record instead of
|
|
# the tablets directly
|
|
utils.check_shard_query_services(self, 'test_nj', 'test_keyspace', destination_shards,
|
|
topodata_pb2.REPLICA, True)
|
|
utils.check_shard_query_services(self, 'test_ny', 'test_keyspace', destination_shards,
|
|
topodata_pb2.REPLICA, True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-c0 c0-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
|
|
# reparent shard_2 to shard_2_replica1, then insert more data and
|
|
# see it flow through still
|
|
utils.run_vtctl(['PlannedReparentShard',
|
|
'-keyspace_shard', 'test_keyspace/80-c0',
|
|
'-new_master', shard_2_replica1.tablet_alias])
|
|
|
|
# update our test variables to point at the new master
|
|
shard_2_master, shard_2_replica1 = shard_2_replica1, shard_2_master
|
|
|
|
utils.pause('check state of _vt.vreplication')
|
|
|
|
logging.debug('Inserting lots of data on source shard after reparenting')
|
|
self._insert_lots(3000, base=2000)
|
|
logging.debug('Checking 80 percent of data was sent fairly quickly')
|
|
self._check_lots_timeout(3000, 80, 10, base=2000)
|
|
|
|
# use vtworker to compare the data again
|
|
if base_sharding.use_multi_split_diff:
|
|
logging.debug('Running vtworker MultiSplitDiff')
|
|
utils.run_vtworker(['-cell', 'test_nj',
|
|
'--use_v3_resharding_mode=false',
|
|
'MultiSplitDiff',
|
|
'--exclude_tables', 'unrelated',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'test_keyspace/80-'],
|
|
auto_log=True)
|
|
else:
|
|
logging.debug('Running vtworker SplitDiff')
|
|
utils.run_vtworker(['-cell', 'test_nj',
|
|
'--use_v3_resharding_mode=false',
|
|
'SplitDiff',
|
|
'--exclude_tables', 'unrelated',
|
|
'--min_healthy_rdonly_tablets', '1',
|
|
'test_keyspace/c0-'],
|
|
auto_log=True)
|
|
|
|
utils.run_vtctl(['ChangeSlaveType', shard_1_rdonly1.tablet_alias, 'rdonly'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['ChangeSlaveType', shard_3_rdonly1.tablet_alias, 'rdonly'],
|
|
auto_log=True)
|
|
|
|
# going to migrate the master now, check the delays
|
|
monitor_thread_1.done = True
|
|
monitor_thread_2.done = True
|
|
insert_thread_1.done = True
|
|
insert_thread_2.done = True
|
|
logging.debug('DELAY 1: %s max_lag=%d ms avg_lag=%d ms',
|
|
monitor_thread_1.thread_name,
|
|
monitor_thread_1.max_lag_ms,
|
|
monitor_thread_1.lag_sum_ms / monitor_thread_1.sample_count)
|
|
logging.debug('DELAY 2: %s max_lag=%d ms avg_lag=%d ms',
|
|
monitor_thread_2.thread_name,
|
|
monitor_thread_2.max_lag_ms,
|
|
monitor_thread_2.lag_sum_ms / monitor_thread_2.sample_count)
|
|
|
|
# mock with the SourceShard records to test 'vtctl SourceShardDelete'
|
|
# and 'vtctl SourceShardAdd'
|
|
utils.run_vtctl(['SourceShardDelete', 'test_keyspace/c0-', '1'],
|
|
auto_log=True)
|
|
utils.run_vtctl(['SourceShardAdd', '--key_range=80-',
|
|
'test_keyspace/c0-', '1', 'test_keyspace/80-'],
|
|
auto_log=True)
|
|
|
|
# CancelResharding should fail because migration has started.
|
|
utils.run_vtctl(['CancelResharding', 'test_keyspace/80-'],
|
|
auto_log=True, expect_fail=True)
|
|
|
|
# do a Migrate that will fail waiting for replication
|
|
# which should cause the Migrate to be canceled and the source
|
|
# master to be serving again.
|
|
utils.run_vtctl(['MigrateServedTypes',
|
|
'-filtered_replication_wait_time', '0s',
|
|
'test_keyspace/80-', 'master'],
|
|
auto_log=True, expect_fail=True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-c0 c0-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_tablet_query_service(self, shard_1_master, True, False)
|
|
|
|
# sabotage master migration and make it fail in an unfinished state.
|
|
utils.run_vtctl(['SetShardTabletControl', '-blacklisted_tables=t',
|
|
'test_keyspace/c0-', 'master'], auto_log=True)
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
|
|
auto_log=True, expect_fail=True)
|
|
|
|
# Query service is disabled in source shard as failure occurred after point of no return
|
|
utils.check_tablet_query_service(self, shard_1_master, False, True)
|
|
|
|
# Global topology records should not change as migration did not succeed
|
|
shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
|
|
self.assertEqual(shard['is_master_serving'], True, 'source shards should be set in destination shard')
|
|
|
|
shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/c0-'])
|
|
self.assertEqual(len(shard['source_shards']), 1, 'source shards should be set in destination shard')
|
|
self.assertEqual(shard['is_master_serving'], False, 'source shards should be set in destination shard')
|
|
|
|
shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-c0'])
|
|
self.assertEqual(len(shard['source_shards']), 1, 'source shards should be set in destination shard')
|
|
self.assertEqual(shard['is_master_serving'], False, 'source shards should be set in destination shard')
|
|
|
|
# remove sabotage, but make it fail early. This should not result
|
|
# in the source master serving, because this failure is past the
|
|
# point of no return.
|
|
utils.run_vtctl(['SetShardTabletControl', '-blacklisted_tables=t',
|
|
'-remove', 'test_keyspace/c0-', 'master'], auto_log=True)
|
|
utils.run_vtctl(['MigrateServedTypes',
|
|
'-filtered_replication_wait_time', '0s',
|
|
'test_keyspace/80-', 'master'],
|
|
auto_log=True, expect_fail=True)
|
|
|
|
utils.check_tablet_query_service(self, shard_1_master, False, True)
|
|
|
|
# do the migration that's expected to succeed
|
|
utils.run_vtctl(['MigrateServedTypes', 'test_keyspace/80-', 'master'],
|
|
auto_log=True)
|
|
utils.check_srv_keyspace('test_nj', 'test_keyspace',
|
|
'Partitions(master): -80 80-c0 c0-\n'
|
|
'Partitions(rdonly): -80 80-c0 c0-\n'
|
|
'Partitions(replica): -80 80-c0 c0-\n',
|
|
keyspace_id_type=base_sharding.keyspace_id_type,
|
|
sharding_column_name='custom_ksid_col')
|
|
utils.check_tablet_query_service(self, shard_1_master, False, True)
|
|
|
|
# check destination shards are serving
|
|
|
|
utils.check_tablet_query_service(self, shard_2_master, True, False)
|
|
utils.check_tablet_query_service(self, shard_3_master, True, False)
|
|
|
|
# check the binlog players are gone now
|
|
self.check_no_binlog_player(shard_2_master)
|
|
self.check_no_binlog_player(shard_3_master)
|
|
|
|
# test reverse_replication
|
|
# start with inserting a row in each destination shard
|
|
self._insert_value(shard_2_master, 'resharding2', 2, 'msg2',
|
|
0x9000000000000000)
|
|
self._insert_value(shard_3_master, 'resharding2', 3, 'msg3',
|
|
0xD000000000000000)
|
|
# ensure the rows are not present yet
|
|
self._check_value(shard_1_master, 'resharding2', 2, 'msg2',
|
|
0x9000000000000000, should_be_here=False)
|
|
self._check_value(shard_1_master, 'resharding2', 3, 'msg3',
|
|
0xD000000000000000, should_be_here=False)
|
|
# repeat the migration with reverse_replication
|
|
utils.run_vtctl(['MigrateServedTypes', '-reverse_replication=true',
|
|
'test_keyspace/80-', 'master'], auto_log=True)
|
|
# look for the rows in the original master after a short wait
|
|
time.sleep(1.0)
|
|
self._check_value(shard_1_master, 'resharding2', 2, 'msg2',
|
|
0x9000000000000000)
|
|
self._check_value(shard_1_master, 'resharding2', 3, 'msg3',
|
|
0xD000000000000000)
|
|
|
|
# retry the migration to ensure it now fails
|
|
utils.run_vtctl(['MigrateServedTypes', '-reverse_replication=true',
|
|
'test_keyspace/80-', 'master'],
|
|
auto_log=True, expect_fail=True)
|
|
|
|
# CancelResharding should now succeed
|
|
utils.run_vtctl(['CancelResharding', 'test_keyspace/80-'], auto_log=True)
|
|
self.check_no_binlog_player(shard_1_master)
|
|
|
|
# delete the original tablets in the original shard
|
|
tablet.kill_tablets([shard_1_master, shard_1_slave1, shard_1_slave2,
|
|
shard_1_ny_rdonly, shard_1_rdonly1])
|
|
for t in [shard_1_slave1, shard_1_slave2, shard_1_ny_rdonly,
|
|
shard_1_rdonly1]:
|
|
utils.run_vtctl(['DeleteTablet', t.tablet_alias], auto_log=True)
|
|
utils.run_vtctl(['DeleteTablet', '-allow_master',
|
|
shard_1_master.tablet_alias], auto_log=True)
|
|
|
|
# rebuild the serving graph, all mentions of the old shards should be gone
|
|
utils.run_vtctl(
|
|
['RebuildKeyspaceGraph', 'test_keyspace'], auto_log=True)
|
|
|
|
# test RemoveShardCell
|
|
utils.run_vtctl(
|
|
['RemoveShardCell', 'test_keyspace/-80', 'test_nj'], auto_log=True,
|
|
expect_fail=True)
|
|
utils.run_vtctl(
|
|
['RemoveShardCell', 'test_keyspace/80-', 'test_nj'], auto_log=True)
|
|
utils.run_vtctl(
|
|
['RemoveShardCell', 'test_keyspace/80-', 'test_ny'], auto_log=True)
|
|
shard = utils.run_vtctl_json(['GetShard', 'test_keyspace/80-'])
|
|
self.assertTrue('cells' not in shard or not shard['cells'])
|
|
|
|
# delete the original shard
|
|
utils.run_vtctl(['DeleteShard', 'test_keyspace/80-'], auto_log=True)
|
|
|
|
# make sure we can't delete the destination shard now that it's serving
|
|
_, stderr = utils.run_vtctl(['DeleteShard', 'test_keyspace/80-c0'],
|
|
expect_fail=True)
|
|
self.assertIn('is still serving, cannot delete it', stderr)
|
|
|
|
# kill everything
|
|
tablet.kill_tablets([shard_0_master, shard_0_replica, shard_0_ny_rdonly,
|
|
shard_2_master, shard_2_replica1, shard_2_replica2,
|
|
shard_2_rdonly1,
|
|
shard_3_master, shard_3_replica, shard_3_rdonly1])
|
|
|
|
if __name__ == '__main__':
|
|
utils.main()
|