2019-04-26 18:27:27 +03:00
|
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
# Licensed under the Apache 2.0 License.
|
|
|
|
import os
|
|
|
|
import getpass
|
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
import math
|
2020-03-06 12:42:55 +03:00
|
|
|
import http
|
2019-04-26 18:27:27 +03:00
|
|
|
import infra.ccf
|
|
|
|
import infra.proc
|
2020-01-29 18:09:28 +03:00
|
|
|
import infra.e2e_args
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
from loguru import logger as LOG
|
|
|
|
|
|
|
|
# This test starts from a given number of nodes (hosts), commits
|
2019-08-15 19:52:43 +03:00
|
|
|
# a transaction, stops the current primary, waits for an election and repeats
|
|
|
|
# this process until no progress can be made (i.e. no primary can be elected
|
2019-04-26 18:27:27 +03:00
|
|
|
# as F > N/2).
|
|
|
|
|
|
|
|
|
|
|
|
def wait_for_index_globally_committed(index, term, nodes):
|
|
|
|
"""
|
|
|
|
Wait for a specific version at a specific term to be committed on all nodes.
|
|
|
|
"""
|
|
|
|
for _ in range(infra.ccf.Network.replication_delay):
|
|
|
|
up_to_date_f = []
|
|
|
|
for f in nodes:
|
2019-10-01 18:17:14 +03:00
|
|
|
with f.node_client() as c:
|
2020-02-19 20:08:06 +03:00
|
|
|
res = c.request("getCommit", {"commit": index})
|
2019-11-25 19:52:04 +03:00
|
|
|
if res.result["term"] == term and (res.global_commit >= index):
|
2019-04-26 18:27:27 +03:00
|
|
|
up_to_date_f.append(f.node_id)
|
|
|
|
if len(up_to_date_f) == len(nodes):
|
|
|
|
break
|
|
|
|
time.sleep(1)
|
|
|
|
assert len(up_to_date_f) == len(
|
|
|
|
nodes
|
2019-08-15 19:52:43 +03:00
|
|
|
), "Only {} out of {} backups are up to date".format(len(up_to_date_f), len(nodes))
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
|
|
|
|
def run(args):
|
|
|
|
# Three nodes minimum to make sure that the raft network can still make progress
|
|
|
|
# if one node stops
|
2020-01-27 16:53:23 +03:00
|
|
|
hosts = ["localhost"] * (4 if args.consensus == "pbft" else 3)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
with infra.ccf.network(
|
2020-02-06 18:27:18 +03:00
|
|
|
hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
|
2019-04-26 18:27:27 +03:00
|
|
|
) as network:
|
|
|
|
|
2019-11-08 12:33:47 +03:00
|
|
|
network.start_and_join(args)
|
2019-04-26 18:27:27 +03:00
|
|
|
current_term = None
|
|
|
|
|
|
|
|
# Time before an election completes
|
|
|
|
max_election_duration = args.election_timeout * 4 // 1000
|
|
|
|
|
|
|
|
# Number of nodes F to stop until network cannot make progress
|
|
|
|
nodes_to_stop = math.ceil(len(hosts) / 2)
|
2019-11-25 19:52:04 +03:00
|
|
|
if args.consensus == "pbft":
|
|
|
|
nodes_to_stop = math.ceil(len(hosts) / 3)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
for _ in range(nodes_to_stop):
|
2019-08-15 19:52:43 +03:00
|
|
|
# Note that for the first iteration, the primary is known in advance anyway
|
|
|
|
LOG.debug("Find freshly elected primary")
|
2020-01-27 16:53:23 +03:00
|
|
|
# After a view change in pbft, finding the new primary takes longer
|
|
|
|
primary, current_term = network.find_primary(
|
|
|
|
request_timeout=(30 if args.consensus == "pbft" else 3)
|
|
|
|
)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-11-25 19:52:04 +03:00
|
|
|
LOG.debug(
|
|
|
|
"Commit new transactions, primary:{}, current_term:{}".format(
|
2020-01-27 16:53:23 +03:00
|
|
|
primary.node_id, current_term
|
2019-11-25 19:52:04 +03:00
|
|
|
)
|
|
|
|
)
|
2019-04-26 18:27:27 +03:00
|
|
|
commit_index = None
|
2020-02-19 20:08:06 +03:00
|
|
|
with primary.user_client() as c:
|
2019-04-26 18:27:27 +03:00
|
|
|
res = c.do(
|
|
|
|
"LOG_record",
|
|
|
|
{
|
|
|
|
"id": current_term,
|
|
|
|
"msg": "This log is committed in term {}".format(current_term),
|
|
|
|
},
|
2019-10-07 17:18:10 +03:00
|
|
|
readonly_hint=None,
|
|
|
|
expected_result=True,
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
commit_index = res.commit
|
|
|
|
|
|
|
|
LOG.debug("Waiting for transaction to be committed by all nodes")
|
|
|
|
wait_for_index_globally_committed(
|
2019-10-01 19:07:29 +03:00
|
|
|
commit_index, current_term, network.get_joined_nodes()
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
|
2019-08-15 19:52:43 +03:00
|
|
|
LOG.debug("Stopping primary")
|
|
|
|
primary.stop()
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-09-10 13:34:21 +03:00
|
|
|
LOG.debug("Waiting for a new primary to be elected...")
|
2019-04-26 18:27:27 +03:00
|
|
|
time.sleep(max_election_duration)
|
|
|
|
|
|
|
|
# More than F nodes have been stopped, trying to commit any message
|
|
|
|
LOG.debug(
|
|
|
|
"No progress can be made as more than {} nodes have stopped".format(
|
|
|
|
nodes_to_stop
|
|
|
|
)
|
|
|
|
)
|
2019-05-20 13:45:53 +03:00
|
|
|
try:
|
2020-01-27 16:53:23 +03:00
|
|
|
primary, _ = network.find_primary()
|
2019-08-15 19:52:43 +03:00
|
|
|
assert False, "Primary should not be found"
|
2020-01-27 16:53:23 +03:00
|
|
|
except infra.ccf.PrimaryNotFound:
|
|
|
|
pass
|
2019-11-25 19:52:04 +03:00
|
|
|
|
|
|
|
LOG.info(
|
|
|
|
"As expected, primary could not be found after election timeout. Test ended successfully."
|
|
|
|
)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
2020-01-29 18:09:28 +03:00
|
|
|
args = infra.e2e_args.cli_args()
|
2020-01-28 17:06:12 +03:00
|
|
|
args.package = "liblogging"
|
2019-04-26 18:27:27 +03:00
|
|
|
run(args)
|