Add isolation/reconnection rotation test (#2586)

This commit is contained in:
Amaury Chamayou 2021-05-14 11:55:08 +01:00 коммит произвёл GitHub
Родитель 5f24ab38b9
Коммит 3774c0ac41
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 57 добавлений и 26 удалений

Просмотреть файл

@ -7,8 +7,8 @@ import infra.proc
import suite.test_requirements as reqs
import os
import subprocess
import reconfiguration
import hashlib
from infra.checker import check_can_progress
from loguru import logger as LOG
@ -41,7 +41,7 @@ def test_verify_quotes(network, args):
LOG.info("Check the network is stable")
primary, _ = network.find_primary()
reconfiguration.check_can_progress(primary)
check_can_progress(primary)
for node in network.get_joined_nodes():
LOG.info(f"Verifying quote for node {node.node_id}")
@ -157,7 +157,7 @@ def test_update_all_nodes(network, args):
node.stop()
LOG.info("Check the network is still functional")
reconfiguration.check_can_progress(new_node)
check_can_progress(new_node)
return network

Просмотреть файл

@ -2,6 +2,8 @@
# Licensed under the Apache 2.0 License.
from ccf.commit import wait_for_commit
from ccf.tx_id import TxID
import time
class Checker:
@ -34,3 +36,20 @@ class Checker:
if self.client:
wait_for_commit(self.client, rpc_result.seqno, rpc_result.view)
def check_can_progress(node, timeout=3):
with node.client() as c:
r = c.get("/node/commit")
original_tx = TxID.from_str(r.body.json()["transaction_id"])
with node.client("user0") as uc:
uc.post("/app/log/private", {"id": 42, "msg": "Hello world"})
end_time = time.time() + timeout
while time.time() < end_time:
current_tx = TxID.from_str(
c.get("/node/commit").body.json()["transaction_id"]
)
if current_tx.seqno > original_tx.seqno:
return current_tx
time.sleep(0.1)
assert False, f"Stuck at {r}"

Просмотреть файл

@ -6,6 +6,9 @@ import infra.e2e_args
import infra.partitions
import infra.logging_app as app
import suite.test_requirements as reqs
from ccf.tx_id import TxID
import time
from infra.checker import check_can_progress
@reqs.description("Invalid partitions are not allowed")
@ -66,7 +69,7 @@ def test_partition_majority(network, args):
@reqs.description("Isolate primary from one backup")
def test_isolate_primary(network, args):
def test_isolate_primary_from_one_backup(network, args):
primary, backups = network.find_nodes()
# Issue one transaction, waiting for all nodes to be have reached
@ -103,6 +106,30 @@ def test_isolate_primary(network, args):
return network
@reqs.description("Isolate and reconnect primary")
def test_isolate_and_reconnect_primary(network, args):
primary, backups = network.find_nodes()
with network.partitioner.partition(backups):
new_primary, _ = network.wait_for_new_primary(
primary, nodes=backups, timeout_multiplier=6
)
new_tx = check_can_progress(new_primary)
# Check reconnected former primary has caught up
with primary.client() as c:
r = c.get("/node/commit")
timeout = 5
end_time = time.time() + timeout
while time.time() < end_time:
current_tx = TxID.from_str(
c.get("/node/commit").body.json()["transaction_id"]
)
if current_tx.seqno >= new_tx.seqno:
return network
time.sleep(0.1)
assert False, f"Stuck at {r}"
def run(args):
txs = app.LoggingTxs()
@ -119,7 +146,9 @@ def run(args):
# test_invalid_partitions(network, args)
test_partition_majority(network, args)
test_isolate_primary(network, args)
test_isolate_primary_from_one_backup(network, args)
for _ in range(5):
test_isolate_and_reconnect_primary(network, args)
if __name__ == "__main__":

Просмотреть файл

@ -4,12 +4,11 @@ import infra.e2e_args
import infra.network
import infra.proc
import infra.logging_app as app
from ccf.tx_id import TxID
import suite.test_requirements as reqs
import time
import tempfile
from shutil import copy
import os
from infra.checker import check_can_progress
from loguru import logger as LOG
@ -34,23 +33,6 @@ def count_nodes(configs, network):
return len(nodes)
def check_can_progress(node, timeout=3):
with node.client() as c:
r = c.get("/node/commit")
original_tx = TxID.from_str(r.body.json()["transaction_id"])
with node.client("user0") as uc:
uc.post("/app/log/private", {"id": 42, "msg": "Hello world"})
end_time = time.time() + timeout
while time.time() < end_time:
current_tx = TxID.from_str(
c.get("/node/commit").body.json()["transaction_id"]
)
if current_tx.seqno > original_tx.seqno:
return
time.sleep(0.1)
assert False, f"Stuck at {r}"
@reqs.description("Adding a valid node without snapshot")
def test_add_node(network, args):
new_node = network.create_and_trust_node(

Просмотреть файл

@ -5,6 +5,7 @@ import infra.network
import infra.proc
import suite.test_requirements as reqs
import reconfiguration
from infra.checker import check_can_progress
from loguru import logger as LOG
@ -15,9 +16,9 @@ def test_suspend_primary(network, args):
primary, _ = network.find_primary()
primary.suspend()
new_primary, _ = network.wait_for_new_primary(primary)
reconfiguration.check_can_progress(new_primary)
check_can_progress(new_primary)
primary.resume()
reconfiguration.check_can_progress(new_primary)
check_can_progress(new_primary)
return network