From 5fe3d63440596a195c0bb21a33ac54c5673db259 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Thu, 10 Nov 2022 19:14:21 +0100 Subject: [PATCH] [release-15.0] [bugfix] Allow VTExplain to handle shards that are not active during resharding (#11640) (#11652) * VTexplain topology only uses serving shards This addresses isse #11632 , which causes vtexplain to sometimes give bad results if the keyspace is being resharded, because sometimes it picks source shards and other times target shards, for routing the query. The issue is that the `VTExplain.buildTopolog()` adds both source and destination shards to the map that holds shards per keyspace, when only one of them is actually serving traffic at any point in time. Later on, vtexplain loops over this map. Because looping over the map gives a non-deterministic order, sometimes the results are correct, and sometimes incorrect - that is, sometimes it gives the result of the shard that is serving, and other times, the shard that is not serving. This change ensures that only the serving shards are added to the shards per keyspace map, thus avoiding the incorrect vtexplain. Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> * This addresses issue #11632 , which causes vtexplain to sometimes give bad results if the keyspace is being resharded, because sometimes it picks source shards and other times target shards, for routing the query. The issue is that the VTExplain.buildTopolog() adds both source and destination shards to the map that holds shards per keyspace, when only one of them is actually serving traffic at any point in time. Later on, vtexplain loops over this map. Because looping over the map gives a non-deterministic order, sometimes the results are correct, and sometimes incorrect - that is, sometimes it gives the result of the shard that is serving, and other times, the shard that is not serving. This change ensures that only the serving shards are added to the shards per keyspace map, thus avoiding the incorrect vtexplain. Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> * Fix check_make_vtadmin_authz_testgen Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> * empty commit to trigger CI Signed-off-by: Andres Taylor Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> Signed-off-by: Andres Taylor Co-authored-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com> Co-authored-by: Andres Taylor --- go/vt/vtadmin/api_authz_test.go | 3 +- .../vtadmin/testutil/authztestgen/config.json | 2 +- go/vt/vtexplain/vtexplain_test.go | 37 +++++++++++-------- go/vt/vtexplain/vtexplain_vtgate.go | 8 ++++ 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/go/vt/vtadmin/api_authz_test.go b/go/vt/vtadmin/api_authz_test.go index 36edfee181..45d3e443c6 100644 --- a/go/vt/vtadmin/api_authz_test.go +++ b/go/vt/vtadmin/api_authz_test.go @@ -3209,7 +3209,8 @@ func testClusters(t testing.TB) []*cluster.Cluster { Keyspace: "test", Name: "-", Shard: &topodatapb.Shard{ - KeyRange: &topodatapb.KeyRange{}, + KeyRange: &topodatapb.KeyRange{}, + IsPrimaryServing: true, }, }, }, diff --git a/go/vt/vtadmin/testutil/authztestgen/config.json b/go/vt/vtadmin/testutil/authztestgen/config.json index 88470fe7e8..ac89d7f555 100644 --- a/go/vt/vtadmin/testutil/authztestgen/config.json +++ b/go/vt/vtadmin/testutil/authztestgen/config.json @@ -23,7 +23,7 @@ { "field": "FindAllShardsInKeyspaceResults", "type": "map[string]struct{\nResponse *vtctldatapb.FindAllShardsInKeyspaceResponse\nError error}", - "value": "\"test\": {\nResponse: &vtctldatapb.FindAllShardsInKeyspaceResponse{\nShards: map[string]*vtctldatapb.Shard{\n\"-\": {\nKeyspace: \"test\",\nName: \"-\",\nShard: &topodatapb.Shard{\nKeyRange: &topodatapb.KeyRange{},\n},\n},\n},\n},\n}," + "value": "\"test\": {\nResponse: &vtctldatapb.FindAllShardsInKeyspaceResponse{\nShards: map[string]*vtctldatapb.Shard{\n\"-\": {\nKeyspace: \"test\",\nName: \"-\",\nShard: &topodatapb.Shard{\nKeyRange: &topodatapb.KeyRange{},\nIsPrimaryServing: true,\n},\n},\n},\n},\n}," }, { "field": "GetBackupsResults", diff --git a/go/vt/vtexplain/vtexplain_test.go b/go/vt/vtexplain/vtexplain_test.go index 21fc30cbd4..8145c59b44 100644 --- a/go/vt/vtexplain/vtexplain_test.go +++ b/go/vt/vtexplain/vtexplain_test.go @@ -283,14 +283,14 @@ func TestJSONOutput(t *testing.T) { } } -func testShardInfo(ks, start, end string, t *testing.T) *topo.ShardInfo { +func testShardInfo(ks, start, end string, primaryServing bool, t *testing.T) *topo.ShardInfo { kr, err := key.ParseKeyRangeParts(start, end) require.NoError(t, err) return topo.NewShardInfo( ks, fmt.Sprintf("%s-%s", start, end), - &topodata.Shard{KeyRange: kr}, + &topodata.Shard{KeyRange: kr, IsPrimaryServing: primaryServing}, &vtexplainTestTopoVersion{}, ) } @@ -304,14 +304,17 @@ func TestUsingKeyspaceShardMap(t *testing.T) { testcase: "select-sharded-8", ShardRangeMap: map[string]map[string]*topo.ShardInfo{ "ks_sharded": { - "-20": testShardInfo("ks_sharded", "", "20", t), - "20-40": testShardInfo("ks_sharded", "20", "40", t), - "40-60": testShardInfo("ks_sharded", "40", "60", t), - "60-80": testShardInfo("ks_sharded", "60", "80", t), - "80-a0": testShardInfo("ks_sharded", "80", "a0", t), - "a0-c0": testShardInfo("ks_sharded", "a0", "c0", t), - "c0-e0": testShardInfo("ks_sharded", "c0", "e0", t), - "e0-": testShardInfo("ks_sharded", "e0", "", t), + "-20": testShardInfo("ks_sharded", "", "20", true, t), + "20-40": testShardInfo("ks_sharded", "20", "40", true, t), + "40-60": testShardInfo("ks_sharded", "40", "60", true, t), + "60-80": testShardInfo("ks_sharded", "60", "80", true, t), + "80-a0": testShardInfo("ks_sharded", "80", "a0", true, t), + "a0-c0": testShardInfo("ks_sharded", "a0", "c0", true, t), + "c0-e0": testShardInfo("ks_sharded", "c0", "e0", true, t), + "e0-": testShardInfo("ks_sharded", "e0", "", true, t), + // Some non-serving shards below - these should never be in the output of vtexplain + "-80": testShardInfo("ks_sharded", "", "80", false, t), + "80-": testShardInfo("ks_sharded", "80", "", false, t), }, }, }, @@ -321,11 +324,15 @@ func TestUsingKeyspaceShardMap(t *testing.T) { // Have mercy on the poor soul that has this keyspace sharding. // But, hey, vtexplain still works so they have that going for them. "ks_sharded": { - "-80": testShardInfo("ks_sharded", "", "80", t), - "80-90": testShardInfo("ks_sharded", "80", "90", t), - "90-a0": testShardInfo("ks_sharded", "90", "a0", t), - "a0-e8": testShardInfo("ks_sharded", "a0", "e8", t), - "e8-": testShardInfo("ks_sharded", "e8", "", t), + "-80": testShardInfo("ks_sharded", "", "80", true, t), + "80-90": testShardInfo("ks_sharded", "80", "90", true, t), + "90-a0": testShardInfo("ks_sharded", "90", "a0", true, t), + "a0-e8": testShardInfo("ks_sharded", "a0", "e8", true, t), + "e8-": testShardInfo("ks_sharded", "e8", "", true, t), + // Plus some un-even shards that are not serving and which should never be in the output of vtexplain + "80-a0": testShardInfo("ks_sharded", "80", "a0", false, t), + "a0-a5": testShardInfo("ks_sharded", "a0", "a5", false, t), + "a5-": testShardInfo("ks_sharded", "a5", "", false, t), }, }, }, diff --git a/go/vt/vtexplain/vtexplain_vtgate.go b/go/vt/vtexplain/vtexplain_vtgate.go index 7c1aa2dfdb..2a53d1f68a 100644 --- a/go/vt/vtexplain/vtexplain_vtgate.go +++ b/go/vt/vtexplain/vtexplain_vtgate.go @@ -131,6 +131,14 @@ func (vte *VTExplain) buildTopology(opts *Options, vschemaStr string, ksShardMap vte.explainTopo.KeyspaceShards[ks] = make(map[string]*topodatapb.ShardReference) for _, shard := range shards { + // If the topology is in the middle of a reshard, there can be two shards covering the same key range (e.g. + // both source shard 80- and target shard 80-c0 cover the keyrange 80-c0). For the purposes of explain, we + // should only consider the one that is serving, hence we skip the ones not serving. Otherwise, vtexplain + // gives inconsistent results - sometimes it will route the query being explained to the source shard, and + // sometimes to the destination shard. See https://github.com/vitessio/vitess/issues/11632 . + if shardInfo, ok := ksShardMap[ks][shard.Name]; ok && !shardInfo.IsPrimaryServing { + continue + } hostname := fmt.Sprintf("%s/%s", ks, shard.Name) log.Infof("registering test tablet %s for keyspace %s shard %s", hostname, ks, shard.Name)