зеркало из https://github.com/microsoft/CCF.git
1047 строки
37 KiB
Python
1047 строки
37 KiB
Python
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the Apache 2.0 License.
|
|
import infra.e2e_args
|
|
import infra.network
|
|
import infra.proc
|
|
import infra.net
|
|
import infra.logging_app as app
|
|
from infra.tx_status import TxStatus
|
|
import suite.test_requirements as reqs
|
|
import tempfile
|
|
from shutil import copy
|
|
from copy import deepcopy
|
|
import os
|
|
import time
|
|
import ccf.ledger
|
|
import json
|
|
import infra.crypto
|
|
from datetime import datetime
|
|
from infra.checker import check_can_progress
|
|
from governance_history import check_signatures
|
|
from infra.snp import IS_SNP
|
|
from infra.runner import ConcurrentRunner
|
|
import http
|
|
import random
|
|
|
|
from loguru import logger as LOG
|
|
|
|
|
|
def node_configs(network):
|
|
configs = {}
|
|
for node in network.get_joined_nodes():
|
|
try:
|
|
with node.client() as nc:
|
|
configs[node.node_id] = nc.get("/node/config").body.json()
|
|
except Exception:
|
|
pass
|
|
return configs
|
|
|
|
|
|
def count_nodes(configs, network):
|
|
nodes = set(str(k) for k in configs.keys())
|
|
stopped = {str(n.node_id) for n in network.nodes if n.is_stopped()}
|
|
for node_id, node_config in configs.items():
|
|
nodes_in_config = set(node_config.keys()) - stopped
|
|
assert nodes == nodes_in_config, f"{nodes} {nodes_in_config} {node_id}"
|
|
return len(nodes)
|
|
|
|
|
|
def wait_for_reconfiguration_to_complete(network, timeout=10):
|
|
max_num_configs = 0
|
|
max_rid = 0
|
|
all_same_rid = False
|
|
end_time = time.time() + timeout
|
|
while max_num_configs > 1 or not all_same_rid:
|
|
max_num_configs = 0
|
|
all_same_rid = True
|
|
for node in network.get_joined_nodes():
|
|
with node.client(verify_ca=False) as c:
|
|
try:
|
|
r = c.get("/node/consensus")
|
|
rj = r.body.json()
|
|
cfgs = rj["details"]["configs"]
|
|
num_configs = len(cfgs)
|
|
max_num_configs = max(max_num_configs, num_configs)
|
|
if num_configs == 1 and cfgs[0]["rid"] != max_rid:
|
|
max_rid = max(max_rid, cfgs[0]["rid"])
|
|
all_same_rid = False
|
|
except Exception as ex:
|
|
# OK, retiring node may be gone or a joining node may not be ready yet
|
|
LOG.info(f"expected RPC failure because of: {ex}")
|
|
time.sleep(0.5)
|
|
LOG.info(f"max num configs: {max_num_configs}, max rid: {max_rid}")
|
|
assert time.time() <= end_time, "Reconfiguration did not complete in time"
|
|
|
|
|
|
@reqs.description("Adding a node with invalid target service certificate")
|
|
def test_add_node_invalid_service_cert(network, args):
|
|
primary, _ = network.find_primary()
|
|
|
|
# Incorrect target service certificate file, in this case the primary's node
|
|
# identity
|
|
service_cert_file = os.path.join(primary.common_dir, f"{primary.local_node_id}.pem")
|
|
new_node = network.create_node("local://localhost")
|
|
try:
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
service_cert_file=service_cert_file,
|
|
timeout=3,
|
|
stop_on_error=True,
|
|
)
|
|
except infra.network.ServiceCertificateInvalid:
|
|
LOG.info(
|
|
f"Node {new_node.local_node_id} with invalid service certificate failed to start, as expected"
|
|
)
|
|
else:
|
|
assert (
|
|
False
|
|
), f"Node {new_node.local_node_id} with invalid service certificate unexpectedly started"
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a valid node")
|
|
def test_add_node(network, args, from_snapshot=True):
|
|
# Note: host is supplied explicitly to avoid having differently
|
|
# assigned IPs for the interfaces, something which the test infra doesn't
|
|
# support widely yet.
|
|
operator_rpc_interface = "operator_rpc_interface"
|
|
host = infra.net.expand_localhost()
|
|
new_node = network.create_node(
|
|
infra.interfaces.HostSpec(
|
|
rpc_interfaces={
|
|
infra.interfaces.PRIMARY_RPC_INTERFACE: infra.interfaces.RPCInterface(
|
|
host=host
|
|
),
|
|
operator_rpc_interface: infra.interfaces.RPCInterface(
|
|
host=host,
|
|
endorsement=infra.interfaces.Endorsement(
|
|
authority=infra.interfaces.EndorsementAuthority.Node
|
|
),
|
|
),
|
|
}
|
|
)
|
|
)
|
|
network.join_node(new_node, args.package, args, from_snapshot=from_snapshot)
|
|
|
|
# Verify self-signed node certificate validity period
|
|
new_node.verify_certificate_validity_period(interface_name=operator_rpc_interface)
|
|
|
|
network.trust_node(
|
|
new_node,
|
|
args,
|
|
validity_period_days=args.maximum_node_certificate_validity_days // 2,
|
|
)
|
|
|
|
if not from_snapshot:
|
|
with new_node.client() as c:
|
|
s = c.get("/node/state")
|
|
assert s.body.json()["node_id"] == new_node.node_id
|
|
assert (
|
|
s.body.json()["startup_seqno"] == 0
|
|
), "Node started without snapshot but reports startup seqno != 0"
|
|
|
|
# Now that the node is trusted, verify endorsed certificate validity period
|
|
new_node.verify_certificate_validity_period()
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a node with an invalid certificate validity period")
|
|
def test_add_node_invalid_validity_period(network, args):
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(new_node, args.package, args)
|
|
try:
|
|
network.trust_node(
|
|
new_node,
|
|
args,
|
|
validity_period_days=args.maximum_node_certificate_validity_days + 1,
|
|
)
|
|
except infra.proposal.ProposalNotAccepted:
|
|
LOG.info(
|
|
"As expected, node could not be trusted since its certificate validity period is invalid"
|
|
)
|
|
else:
|
|
raise AssertionError(
|
|
"Node should not be trusted if its certificate validity period is invalid"
|
|
)
|
|
return network
|
|
|
|
|
|
def test_add_node_on_other_curve(network, args):
|
|
original_curve = args.curve_id
|
|
args.curve_id = (
|
|
infra.network.EllipticCurve.secp256r1
|
|
if original_curve is None
|
|
else original_curve.next()
|
|
)
|
|
network = test_add_node(network, args)
|
|
args.curve_id = original_curve
|
|
return network
|
|
|
|
|
|
@reqs.description("Changing curve used for identity of new nodes and new services")
|
|
def test_change_curve(network, args):
|
|
# NB: This doesn't actually test things, it just changes the configuration
|
|
# for future tests. Expects to be part of an interesting suite
|
|
original_curve = args.curve_id
|
|
args.curve_id = (
|
|
infra.network.EllipticCurve.secp256r1
|
|
if original_curve is None
|
|
else original_curve.next()
|
|
)
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a valid node from a backup")
|
|
@reqs.at_least_n_nodes(2)
|
|
def test_add_node_from_backup(network, args):
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
target_node=network.find_any_backup(),
|
|
)
|
|
network.trust_node(new_node, args)
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a node with endorsements retrieved from remote server")
|
|
def test_add_node_endorsements_endpoints(network, args):
|
|
# By default, SEV-SNP endorsements are retrieved from the environment on ACI.
|
|
# However, we still want to support fetching those from a remote server, which is
|
|
# tested here
|
|
primary, _ = network.find_primary()
|
|
if not IS_SNP:
|
|
LOG.warning("Skipping test as running on non SEV-SNP")
|
|
return network
|
|
|
|
args_copy = deepcopy(args)
|
|
test_vectors = [
|
|
(["Azure:global.acccache.azure.net"], True),
|
|
(["AMD:kdsintf.amd.com"], True),
|
|
(["AMD:invalid.amd.com"], False),
|
|
(["Azure:invalid.azure.com", "AMD:kdsintf.amd.com"], True), # Fallback server
|
|
]
|
|
|
|
for servers, expected_result in test_vectors:
|
|
LOG.info(
|
|
f"Joining new node with endorsement server {servers} (expect success: {expected_result})"
|
|
)
|
|
new_node = network.create_node("local://localhost")
|
|
args_copy.snp_endorsements_servers = servers
|
|
try:
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args_copy,
|
|
set_snp_report_endorsements_envvar=None,
|
|
timeout=15,
|
|
)
|
|
except TimeoutError:
|
|
assert not expected_result
|
|
LOG.info(
|
|
f"Node with invalid quote endorsement servers {servers} could not join as expected"
|
|
)
|
|
else:
|
|
assert (
|
|
expected_result
|
|
), f"Node with invalid quote endorsement servers joined unexpectedly: {servers}"
|
|
network.retire_node(primary, new_node)
|
|
new_node.stop()
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a valid node from snapshot")
|
|
@reqs.at_least_n_nodes(2)
|
|
def test_add_node_from_snapshot(network, args, copy_ledger=True, from_backup=False):
|
|
# Before adding the node from a snapshot, override at least one app entry
|
|
# and wait for a new committed snapshot covering that entry, so that there
|
|
# is at least one historical entry to verify.
|
|
network.txs.issue(network, number_txs=1)
|
|
idx, historical_entry = network.txs.get_last_tx(priv=True)
|
|
network.txs.issue(network, number_txs=1, repeat=True)
|
|
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
copy_ledger=copy_ledger,
|
|
target_node=network.find_any_backup() if from_backup else None,
|
|
from_snapshot=True,
|
|
)
|
|
network.trust_node(new_node, args)
|
|
|
|
with new_node.client() as c:
|
|
r = c.get("/node/state")
|
|
assert (
|
|
r.body.json()["startup_seqno"] != 0
|
|
), "Node started from snapshot but reports startup seqno of 0"
|
|
|
|
# Finally, verify all app entries on the new node, including historical ones
|
|
# from the historical ledger and skip historical entries if ledger
|
|
# was not copied to node.
|
|
network.txs.verify(node=new_node, include_historical=copy_ledger)
|
|
|
|
# Check that historical entry can be retrieved (or not, if new node does not
|
|
# have access to historical ledger files).
|
|
try:
|
|
network.txs.verify_tx(
|
|
node=new_node,
|
|
idx=idx,
|
|
msg=historical_entry["msg"],
|
|
seqno=historical_entry["seqno"],
|
|
view=historical_entry["view"],
|
|
historical=True,
|
|
)
|
|
except infra.logging_app.LoggingTxsVerifyException:
|
|
assert (
|
|
not copy_ledger
|
|
), f"New node {new_node.local_node_id} without ledger should not be able to serve historical entries"
|
|
else:
|
|
assert (
|
|
copy_ledger
|
|
), f"New node {new_node.local_node_id} with ledger should be able to serve historical entries"
|
|
|
|
if not copy_ledger:
|
|
# Pick some sequence numbers before the snapshot the new node started from, and for which
|
|
# the new node does not have corresponding ledger chunks
|
|
missing_txids = []
|
|
with new_node.client("user0") as c:
|
|
r = c.get("/node/state")
|
|
assert r.status_code == http.HTTPStatus.OK, r
|
|
startup_seqno = r.body.json()["startup_seqno"]
|
|
assert startup_seqno != 0, startup_seqno
|
|
possible_seqno_range = range(1, startup_seqno)
|
|
num_samples = min(len(possible_seqno_range), 5)
|
|
missing_seqnos = sorted(random.sample(possible_seqno_range, num_samples))
|
|
LOG.info(f"Verifying status of transactions at seqnos: {missing_seqnos}")
|
|
view = 2
|
|
for seqno in missing_seqnos:
|
|
assert seqno != 0, "0 is not a valid seqno"
|
|
status = TxStatus.Invalid
|
|
while status == TxStatus.Invalid:
|
|
r = c.get(f"/node/tx?transaction_id={view}.{seqno}")
|
|
assert r.status_code == http.HTTPStatus.OK, r
|
|
status = TxStatus(r.body.json()["status"])
|
|
if status == TxStatus.Committed:
|
|
missing_txids.append(f"{view}.{seqno}")
|
|
else:
|
|
# Should never happen, because we're looking at seqnos for which there
|
|
# is a committed snapshot, and so are definitely committed.
|
|
assert status != TxStatus.Pending, status
|
|
view += 1
|
|
# Not likely to happen on purpose
|
|
assert view < 10, view
|
|
|
|
LOG.info("Check historical queries return ACCEPTED")
|
|
with new_node.client("user0") as c:
|
|
for txid in missing_txids:
|
|
# New node knows transactions are committed
|
|
rc = c.get(f"/node/tx?transaction_id={txid}")
|
|
status = TxStatus(r.body.json()["status"])
|
|
assert status == TxStatus.Committed
|
|
# But can't read their contents
|
|
rc = c.get(f"/app/receipt?transaction_id={txid}")
|
|
assert rc.status_code == http.HTTPStatus.ACCEPTED, rc
|
|
time.sleep(3)
|
|
# Not even after giving the host enough time
|
|
rc = c.get(f"/app/receipt?transaction_id={txid}")
|
|
assert rc.status_code == http.HTTPStatus.ACCEPTED, rc
|
|
|
|
primary, _ = network.find_primary()
|
|
network.retire_node(primary, new_node)
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding as many pending nodes as current number of nodes")
|
|
@reqs.supports_methods("/app/log/private")
|
|
def test_add_as_many_pending_nodes(network, args):
|
|
# Killing pending nodes should not change the raft consensus rules
|
|
primary, _ = network.find_primary()
|
|
number_new_nodes = len(network.nodes)
|
|
LOG.info(
|
|
f"Adding {number_new_nodes} pending nodes - consensus rules should not change"
|
|
)
|
|
|
|
new_nodes = []
|
|
for _ in range(number_new_nodes):
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(new_node, args.package, args)
|
|
new_nodes.append(new_node)
|
|
|
|
for new_node in new_nodes:
|
|
new_node.stop()
|
|
|
|
# Even though pending nodes (half the number of nodes) are stopped,
|
|
# service can still make progress
|
|
check_can_progress(primary)
|
|
|
|
# Cleanup killed pending nodes
|
|
for new_node in new_nodes:
|
|
network.retire_node(primary, new_node)
|
|
|
|
wait_for_reconfiguration_to_complete(network)
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Retiring a backup")
|
|
@reqs.at_least_n_nodes(2)
|
|
@reqs.can_kill_n_nodes(1)
|
|
def test_retire_backup(network, args):
|
|
primary, _ = network.find_primary()
|
|
backup_to_retire = network.find_any_backup()
|
|
network.retire_node(primary, backup_to_retire)
|
|
backup_to_retire.stop()
|
|
check_can_progress(primary)
|
|
wait_for_reconfiguration_to_complete(network)
|
|
return network
|
|
|
|
|
|
@reqs.description("Retiring the primary")
|
|
@reqs.can_kill_n_nodes(1)
|
|
def test_retire_primary(network, args):
|
|
pre_count = count_nodes(node_configs(network), network)
|
|
|
|
primary, backup = network.find_primary_and_any_backup()
|
|
network.retire_node(primary, primary, timeout=15)
|
|
# Query this backup to find the new primary. If we ask any other
|
|
# node, then this backup may not know the new primary by the
|
|
# time we call check_can_progress.
|
|
new_primary, _ = network.wait_for_new_primary(primary, nodes=[backup])
|
|
# See https://github.com/microsoft/CCF/issues/1713
|
|
check_can_progress(new_primary)
|
|
check_can_progress(backup)
|
|
post_count = count_nodes(node_configs(network), network)
|
|
assert pre_count == post_count + 1
|
|
primary.stop()
|
|
wait_for_reconfiguration_to_complete(network)
|
|
return network
|
|
|
|
|
|
@reqs.description("Test node filtering by status")
|
|
def test_node_filter(network, args):
|
|
primary, _ = network.find_primary_and_any_backup()
|
|
with primary.client() as c:
|
|
|
|
def get_nodes(status):
|
|
r = c.get(f"/node/network/nodes?status={status}")
|
|
nodes = r.body.json()["nodes"]
|
|
# Primary may change during operation, so do not check for primary equality
|
|
for node in nodes:
|
|
del node["primary"]
|
|
return sorted(nodes, key=lambda node: node["node_id"])
|
|
|
|
trusted_before = get_nodes("Trusted")
|
|
pending_before = get_nodes("Pending")
|
|
retired_before = get_nodes("Retired")
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(new_node, args.package, args, target_node=primary)
|
|
trusted_after = get_nodes("Trusted")
|
|
pending_after = get_nodes("Pending")
|
|
retired_after = get_nodes("Retired")
|
|
assert trusted_before == trusted_after, (trusted_before, trusted_after)
|
|
assert len(pending_before) + 1 == len(pending_after), (
|
|
pending_before,
|
|
pending_after,
|
|
)
|
|
assert retired_before == retired_after, (retired_before, retired_after)
|
|
|
|
assert all(info["status"] == "Trusted" for info in trusted_after), trusted_after
|
|
assert all(info["status"] == "Pending" for info in pending_after), pending_after
|
|
assert all(info["status"] == "Retired" for info in retired_after), retired_after
|
|
return network
|
|
|
|
|
|
@reqs.description("Get node CCF version")
|
|
def test_version(network, args):
|
|
if args.ccf_version is None:
|
|
LOG.warning(
|
|
"Skipping network version check as no expected version is specified"
|
|
)
|
|
return
|
|
|
|
nodes = network.get_joined_nodes()
|
|
|
|
for node in nodes:
|
|
with node.client() as c:
|
|
r = c.get("/node/version")
|
|
assert r.body.json()["ccf_version"] == args.ccf_version
|
|
assert r.body.json()["unsafe"] == os.path.exists(
|
|
os.path.join(args.binary_dir, "UNSAFE")
|
|
)
|
|
|
|
|
|
@reqs.description("Issue fake join requests as untrusted client")
|
|
def test_issue_fake_join(network, args):
|
|
primary, _ = network.find_primary()
|
|
|
|
# Assemble dummy join request body
|
|
net = {"bind_address": "0:0"}
|
|
req = {}
|
|
req["node_info_network"] = {
|
|
"node_to_node_interface": net,
|
|
"rpc_interfaces": {"name": net},
|
|
}
|
|
req["consensus_type"] = "CFT"
|
|
req["startup_seqno"] = 0
|
|
with open(
|
|
os.path.join(network.common_dir, "member0_enc_pubk.pem"), "r", encoding="utf-8"
|
|
) as f:
|
|
req["public_encryption_key"] = f.read()
|
|
|
|
with primary.client(identity="user0") as c:
|
|
# First, retrieve real quote from primary node
|
|
own_quote = c.get("/node/quotes/self").body.json()
|
|
|
|
LOG.info("Join with SGX dummy quote")
|
|
req["quote_info"] = {"format": "OE_SGX_v1", "quote": "", "endorsements": ""}
|
|
r = c.post("/node/join", body=req)
|
|
assert r.status_code == http.HTTPStatus.UNAUTHORIZED
|
|
assert (
|
|
r.body.json()["error"]["code"] == "InvalidQuote"
|
|
), "Quote verification should fail when OE_SGX_v1 is specified"
|
|
|
|
LOG.info("Join with SGX real quote, but different TLS key")
|
|
req["quote_info"] = {
|
|
"format": "OE_SGX_v1",
|
|
"quote": own_quote["raw"],
|
|
"endorsements": own_quote["endorsements"],
|
|
}
|
|
r = c.post("/node/join", body=req)
|
|
assert r.status_code == http.HTTPStatus.UNAUTHORIZED
|
|
assert r.body.json()["error"]["code"] == "InvalidQuote"
|
|
if args.enclave_platform != "sgx":
|
|
assert r.body.json()["error"]["message"] == "Quote could not be verified"
|
|
else:
|
|
assert (
|
|
r.body.json()["error"]["message"]
|
|
== "Quote report data does not contain node's public key hash"
|
|
)
|
|
|
|
LOG.info("Join with AMD SEV-SNP quote")
|
|
req["quote_info"] = {
|
|
"format": "AMD_SEV_SNP_v1",
|
|
"quote": own_quote["raw"],
|
|
"endorsements": own_quote["endorsements"],
|
|
}
|
|
r = c.post("/node/join", body=req)
|
|
if args.enclave_platform != "snp":
|
|
assert r.status_code == http.HTTPStatus.UNAUTHORIZED
|
|
assert r.body.json()["error"]["code"] == "InvalidQuote"
|
|
assert r.body.json()["error"]["message"] == "Quote could not be verified"
|
|
else:
|
|
assert (
|
|
r.body.json()["error"]["message"]
|
|
== "Quote report data does not contain node's public key hash"
|
|
)
|
|
|
|
LOG.info("Join with virtual quote")
|
|
req["quote_info"] = {
|
|
"format": "Insecure_Virtual",
|
|
"quote": "",
|
|
"endorsements": "",
|
|
}
|
|
r = c.post("/node/join", body=req)
|
|
if args.enclave_platform == "virtual":
|
|
assert r.status_code == http.HTTPStatus.OK
|
|
assert r.body.json()["node_status"] == ccf.ledger.NodeStatus.PENDING.value
|
|
else:
|
|
assert r.status_code == http.HTTPStatus.UNAUTHORIZED
|
|
assert (
|
|
r.body.json()["error"]["code"] == "InvalidQuote"
|
|
), "Virtual node must never join non-virtual network"
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Replace a node on the same addresses")
|
|
@reqs.can_kill_n_nodes(1)
|
|
def test_node_replacement(network, args):
|
|
primary, backups = network.find_nodes()
|
|
|
|
node_to_replace = backups[-1]
|
|
LOG.info(f"Retiring node {node_to_replace.local_node_id}")
|
|
network.retire_node(primary, node_to_replace)
|
|
node_to_replace.stop()
|
|
check_can_progress(primary)
|
|
|
|
LOG.info("Adding one node on same address as retired node")
|
|
replacement_node = network.create_node(
|
|
f"local://{node_to_replace.get_public_rpc_host()}:{node_to_replace.get_public_rpc_port()}",
|
|
node_port=node_to_replace.n2n_interface.port,
|
|
)
|
|
network.join_node(replacement_node, args.package, args)
|
|
network.trust_node(replacement_node, args)
|
|
|
|
assert replacement_node.node_id != node_to_replace.node_id
|
|
assert (
|
|
replacement_node.get_public_rpc_host() == node_to_replace.get_public_rpc_host()
|
|
)
|
|
assert replacement_node.n2n_interface.port == node_to_replace.n2n_interface.port
|
|
assert (
|
|
replacement_node.get_public_rpc_port() == node_to_replace.get_public_rpc_port()
|
|
)
|
|
|
|
allowed_to_suspend_count = network.get_f() - len(network.get_stopped_nodes())
|
|
backups_to_suspend = backups[:allowed_to_suspend_count]
|
|
LOG.info(
|
|
f"Suspending {len(backups_to_suspend)} other nodes to make progress depend on the replacement"
|
|
)
|
|
for other_backup in backups_to_suspend:
|
|
other_backup.suspend()
|
|
# Confirm the network can make progress
|
|
check_can_progress(primary)
|
|
for other_backup in backups_to_suspend:
|
|
other_backup.resume()
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Join straddling a primary retirement")
|
|
@reqs.at_least_n_nodes(3)
|
|
def test_join_straddling_primary_replacement(network, args):
|
|
# We need a fourth node before we attempt the replacement, otherwise
|
|
# we will reach a situation where two out four nodes in the voting quorum
|
|
# are unable to participate (one retired and one not yet joined).
|
|
test_add_node(network, args)
|
|
primary, _ = network.find_primary()
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(new_node, args.package, args)
|
|
proposal_body = {
|
|
"actions": [
|
|
{
|
|
"name": "transition_node_to_trusted",
|
|
"args": {
|
|
"node_id": new_node.node_id,
|
|
"valid_from": str(datetime.utcnow()),
|
|
},
|
|
},
|
|
{
|
|
"name": "remove_node",
|
|
"args": {"node_id": primary.node_id},
|
|
},
|
|
]
|
|
}
|
|
|
|
proposal = network.consortium.get_any_active_member().propose(
|
|
primary, proposal_body
|
|
)
|
|
network.consortium.vote_using_majority(
|
|
primary,
|
|
proposal,
|
|
{"ballot": "export function vote (proposal, proposer_id) { return true }"},
|
|
timeout=10,
|
|
)
|
|
|
|
network.wait_for_new_primary(primary)
|
|
new_node.wait_for_node_to_join(timeout=10)
|
|
|
|
primary.stop()
|
|
network.nodes.remove(primary)
|
|
wait_for_reconfiguration_to_complete(network)
|
|
return network
|
|
|
|
|
|
@reqs.description("Test retired nodes have emitted at most one signature")
|
|
def test_retiring_nodes_emit_at_most_one_signature(network, args):
|
|
primary, _ = network.find_primary()
|
|
|
|
# Force ledger flush of all transactions so far
|
|
network.get_latest_ledger_public_state()
|
|
ledger = ccf.ledger.Ledger(primary.remote.ledger_paths())
|
|
|
|
retiring_nodes = set()
|
|
retired_nodes = set()
|
|
for chunk in ledger:
|
|
for tr in chunk:
|
|
tables = tr.get_public_domain().get_tables()
|
|
if ccf.ledger.NODES_TABLE_NAME in tables:
|
|
nodes = tables[ccf.ledger.NODES_TABLE_NAME]
|
|
for nid, info_ in nodes.items():
|
|
if info_ is None:
|
|
# Node was removed
|
|
continue
|
|
info = json.loads(info_)
|
|
if info["status"] == "Retired":
|
|
retiring_nodes.add(nid)
|
|
|
|
if ccf.ledger.SIGNATURE_TX_TABLE_NAME in tables:
|
|
sigs = tables[ccf.ledger.SIGNATURE_TX_TABLE_NAME]
|
|
assert len(sigs) == 1, sigs.keys()
|
|
(sig_,) = sigs.values()
|
|
sig = json.loads(sig_)
|
|
assert (
|
|
sig["node"] not in retired_nodes
|
|
), f"Unexpected signature from {sig['node']}"
|
|
retired_nodes |= retiring_nodes
|
|
retiring_nodes = set()
|
|
|
|
assert not retiring_nodes, (retiring_nodes, retired_nodes)
|
|
LOG.info("{} nodes retired throughout test", len(retired_nodes))
|
|
|
|
wait_for_reconfiguration_to_complete(network)
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Adding a learner without snapshot")
|
|
def test_learner_catches_up(network, args):
|
|
primary, _ = network.find_primary()
|
|
num_nodes_before = 0
|
|
|
|
with primary.client() as c:
|
|
s = c.get("/node/consensus")
|
|
rj = s.body.json()
|
|
# At this point, there should be exactly one configuration
|
|
assert len(rj["details"]["configs"]) == 1
|
|
c0 = rj["details"]["configs"][0]["nodes"]
|
|
num_nodes_before = len(c0)
|
|
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(new_node, args.package, args)
|
|
network.trust_node(new_node, args)
|
|
|
|
with new_node.client() as c:
|
|
s = c.get("/node/network/nodes/self")
|
|
rj = s.body.json()
|
|
assert rj["status"] == "Learner" or rj["status"] == "Trusted"
|
|
|
|
network.wait_for_node_in_store(
|
|
primary,
|
|
new_node.node_id,
|
|
node_status=(ccf.ledger.NodeStatus.TRUSTED),
|
|
timeout=3,
|
|
)
|
|
|
|
with primary.client() as c:
|
|
s = c.get("/node/consensus")
|
|
rj = s.body.json()
|
|
assert len(rj["details"]["learners"]) == 0
|
|
|
|
# At this point, there should be exactly one configuration, which includes the new node.
|
|
assert len(rj["details"]["configs"]) == 1
|
|
c0 = rj["details"]["configs"][0]["nodes"]
|
|
assert len(c0) == num_nodes_before + 1
|
|
assert new_node.node_id in c0
|
|
|
|
return network
|
|
|
|
|
|
@reqs.description("Test node certificates validity period")
|
|
def test_node_certificates_validity_period(network, args):
|
|
for node in network.get_joined_nodes():
|
|
node.verify_certificate_validity_period()
|
|
return network
|
|
|
|
|
|
@reqs.description("Add a new node without a snapshot but with the historical ledger")
|
|
def test_add_node_with_read_only_ledger(network, args):
|
|
network.txs.issue(network, number_txs=10)
|
|
network.txs.issue(network, number_txs=2, repeat=True)
|
|
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node, args.package, args, from_snapshot=False, copy_ledger=True
|
|
)
|
|
network.trust_node(new_node, args)
|
|
return network
|
|
|
|
|
|
@reqs.description("Test reconfiguration type in service config")
|
|
def test_service_config_endpoint(network, args):
|
|
for n in network.get_joined_nodes():
|
|
with n.client() as c:
|
|
r = c.get("/node/service/configuration")
|
|
rj = r.body.json()
|
|
assert args.reconfiguration_type == rj["reconfiguration_type"]
|
|
|
|
|
|
@reqs.description("Confirm ledger contains expected entries")
|
|
def test_ledger_invariants(network, args):
|
|
# Force ledger flush of all transactions so far
|
|
network.get_latest_ledger_public_state()
|
|
|
|
for node in network.nodes:
|
|
LOG.info(f"Examining ledger on node {node.local_node_id}")
|
|
ledger_directories = node.remote.ledger_paths()
|
|
ledger = ccf.ledger.Ledger(ledger_directories)
|
|
check_signatures(ledger)
|
|
|
|
return network
|
|
|
|
|
|
def run_all(args):
|
|
txs = app.LoggingTxs("user0")
|
|
with infra.network.network(
|
|
args.nodes,
|
|
args.binary_dir,
|
|
args.debug_nodes,
|
|
args.perf_nodes,
|
|
pdb=args.pdb,
|
|
txs=txs,
|
|
) as network:
|
|
network.start_and_open(args)
|
|
|
|
test_version(network, args)
|
|
test_issue_fake_join(network, args)
|
|
|
|
test_add_as_many_pending_nodes(network, args)
|
|
test_add_node_invalid_service_cert(network, args)
|
|
test_add_node(network, args, from_snapshot=False)
|
|
test_add_node_with_read_only_ledger(network, args)
|
|
test_join_straddling_primary_replacement(network, args)
|
|
test_node_replacement(network, args)
|
|
test_add_node_from_backup(network, args)
|
|
test_add_node_endorsements_endpoints(network, args)
|
|
test_add_node_on_other_curve(network, args)
|
|
test_retire_backup(network, args)
|
|
test_add_node(network, args)
|
|
test_retire_primary(network, args)
|
|
|
|
test_add_node_from_snapshot(network, args)
|
|
test_add_node_from_snapshot(network, args, from_backup=True)
|
|
test_add_node_from_snapshot(network, args, copy_ledger=False)
|
|
|
|
test_node_filter(network, args)
|
|
test_retiring_nodes_emit_at_most_one_signature(network, args)
|
|
|
|
if args.reconfiguration_type == "TwoTransaction":
|
|
test_learner_catches_up(network, args)
|
|
|
|
test_service_config_endpoint(network, args)
|
|
test_node_certificates_validity_period(network, args)
|
|
test_add_node_invalid_validity_period(network, args)
|
|
|
|
test_ledger_invariants(network, args)
|
|
|
|
run_join_old_snapshot(args)
|
|
|
|
|
|
def run_join_old_snapshot(args):
|
|
txs = app.LoggingTxs("user0")
|
|
nodes = ["local://localhost"]
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
with infra.network.network(
|
|
nodes,
|
|
args.binary_dir,
|
|
args.debug_nodes,
|
|
args.perf_nodes,
|
|
pdb=args.pdb,
|
|
txs=txs,
|
|
) as network:
|
|
network.start_and_open(args)
|
|
primary, _ = network.find_primary()
|
|
|
|
# First, retrieve and save one committed snapshot
|
|
txs.issue(network, number_txs=args.snapshot_tx_interval)
|
|
old_committed_snapshots = network.get_committed_snapshots(primary)
|
|
copy(
|
|
os.path.join(
|
|
old_committed_snapshots, os.listdir(old_committed_snapshots)[0]
|
|
),
|
|
tmp_dir,
|
|
)
|
|
|
|
# Then generate another newer snapshot, and add two more nodes from it
|
|
txs.issue(network, number_txs=args.snapshot_tx_interval)
|
|
|
|
for _ in range(0, 2):
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
from_snapshot=True,
|
|
)
|
|
network.trust_node(new_node, args)
|
|
|
|
# Kill primary and wait for a new one: new primary is
|
|
# guaranteed to have started from the new snapshot
|
|
primary.stop()
|
|
network.wait_for_new_primary(primary)
|
|
|
|
# Start new node from the old snapshot
|
|
try:
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
from_snapshot=True,
|
|
snapshots_dir=tmp_dir,
|
|
timeout=3,
|
|
)
|
|
except infra.network.StartupSeqnoIsOld:
|
|
LOG.info(
|
|
f"Node {new_node.local_node_id} started from old snapshot could not join the service, as expected"
|
|
)
|
|
else:
|
|
raise RuntimeError(
|
|
f"Node {new_node.local_node_id} started from old snapshot unexpectedly joined the service"
|
|
)
|
|
|
|
# Start new node from no snapshot
|
|
try:
|
|
new_node = network.create_node("local://localhost")
|
|
network.join_node(
|
|
new_node,
|
|
args.package,
|
|
args,
|
|
from_snapshot=False,
|
|
timeout=3,
|
|
)
|
|
except infra.network.StartupSeqnoIsOld:
|
|
LOG.info(
|
|
f"Node {new_node.local_node_id} started without snapshot could not join the service, as expected"
|
|
)
|
|
else:
|
|
raise RuntimeError(
|
|
f"Node {new_node.local_node_id} started without snapshot unexpectedly joined the service successfully"
|
|
)
|
|
|
|
|
|
def get_current_nodes_table(network):
|
|
tables, _ = network.get_latest_ledger_public_state()
|
|
tn = "public:ccf.gov.nodes.info"
|
|
r = {}
|
|
for nid, info in tables[tn].items():
|
|
r[nid.decode()] = json.loads(info)
|
|
return r
|
|
|
|
|
|
def check_2tx_ledger(ledger_paths, learner_id):
|
|
pending_at = 0
|
|
learner_at = 0
|
|
trusted_at = 0
|
|
|
|
ledger = ccf.ledger.Ledger(ledger_paths, committed_only=False)
|
|
|
|
for chunk in ledger:
|
|
for tr in chunk:
|
|
tables = tr.get_public_domain().get_tables()
|
|
if ccf.ledger.NODES_TABLE_NAME in tables:
|
|
nodes = tables[ccf.ledger.NODES_TABLE_NAME]
|
|
for nid, info_ in nodes.items():
|
|
info = json.loads(info_)
|
|
if nid.decode() == learner_id and "status" in info:
|
|
seq_no = tr.get_public_domain().get_seqno()
|
|
if info["status"] == "Pending":
|
|
pending_at = seq_no
|
|
elif info["status"] == "Learner":
|
|
learner_at = seq_no
|
|
elif info["status"] == "Trusted":
|
|
trusted_at = seq_no
|
|
|
|
assert pending_at < learner_at < trusted_at
|
|
|
|
|
|
@reqs.description("Migrate from 1tx to 2tx reconfiguration scheme")
|
|
def test_migration_2tx_reconfiguration(
|
|
network, args, initial_is_1tx=True, valid_from=None, **kwargs
|
|
):
|
|
primary, _ = network.find_primary()
|
|
|
|
# Check that the service config agrees that this is a 1tx network
|
|
with primary.client() as c:
|
|
s = c.get("/node/service/configuration").body.json()
|
|
if initial_is_1tx:
|
|
assert s["reconfiguration_type"] == "OneTransaction"
|
|
|
|
network.consortium.submit_2tx_migration_proposal(primary)
|
|
network.wait_for_all_nodes_to_commit(primary)
|
|
|
|
# Check that the service config has been updated
|
|
with primary.client() as c:
|
|
rj = c.get("/node/service/configuration").body.json()
|
|
assert rj["reconfiguration_type"] == "TwoTransaction"
|
|
|
|
# Check that all nodes have updated their consensus parameters
|
|
for node in network.nodes:
|
|
with node.client() as c:
|
|
rj = c.get("/node/consensus").body.json()
|
|
assert "reconfiguration_type" in rj["details"]
|
|
assert rj["details"]["reconfiguration_type"] == "TwoTransaction"
|
|
assert len(rj["details"]["learners"]) == 0
|
|
|
|
new_node = network.create_node("local://localhost", **kwargs)
|
|
network.join_node(new_node, args.package, args)
|
|
network.trust_node(new_node, args, valid_from=valid_from)
|
|
|
|
# Check that the new node has the right consensus parameter
|
|
with new_node.client() as c:
|
|
rj = c.get("/node/consensus").body.json()
|
|
assert "reconfiguration_type" in rj["details"]
|
|
assert "learners" in rj["details"]
|
|
assert rj["details"]["reconfiguration_type"] == "TwoTransaction"
|
|
assert len(rj["details"]["learners"]) == 0
|
|
|
|
|
|
def run_migration_tests(args):
|
|
if args.reconfiguration_type != "OneTransaction":
|
|
return
|
|
|
|
with infra.network.network(
|
|
args.nodes,
|
|
args.binary_dir,
|
|
args.debug_nodes,
|
|
args.perf_nodes,
|
|
pdb=args.pdb,
|
|
) as network:
|
|
network.start_and_open(args)
|
|
test_migration_2tx_reconfiguration(network, args)
|
|
primary, _ = network.find_primary()
|
|
new_node = network.nodes[-1]
|
|
|
|
ledger_paths = primary.remote.ledger_paths()
|
|
learner_id = new_node.node_id
|
|
|
|
check_2tx_ledger(ledger_paths, learner_id)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
def add(parser):
|
|
parser.add_argument(
|
|
"--include-2tx-reconfig",
|
|
help="Include tests for the 2-transaction reconfiguration scheme",
|
|
default=False,
|
|
action="store_true",
|
|
)
|
|
|
|
cr = ConcurrentRunner(add)
|
|
|
|
cr.add(
|
|
"1tx_reconfig",
|
|
run_all,
|
|
package="samples/apps/logging/liblogging",
|
|
nodes=infra.e2e_args.min_nodes(cr.args, f=1),
|
|
reconfiguration_type="OneTransaction",
|
|
)
|
|
|
|
if cr.args.include_2tx_reconfig:
|
|
cr.add(
|
|
"2tx_reconfig",
|
|
run_all,
|
|
package="samples/apps/logging/liblogging",
|
|
nodes=infra.e2e_args.min_nodes(cr.args, f=1),
|
|
reconfiguration_type="TwoTransaction",
|
|
)
|
|
|
|
cr.add(
|
|
"migration",
|
|
run_migration_tests,
|
|
package="samples/apps/logging/liblogging",
|
|
nodes=infra.e2e_args.min_nodes(cr.args, f=1),
|
|
reconfiguration_type="OneTransaction",
|
|
)
|
|
|
|
cr.run()
|