зеркало из https://github.com/microsoft/CCF.git
Generate snapshots by default (#2029)
This commit is contained in:
Родитель
7d11a18993
Коммит
016d5441b5
|
@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
||||||
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
||||||
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
## Changed
|
||||||
|
|
||||||
|
- Snapshots are generated by default on the current primary node, every `10,000` committed transaction (#2029).
|
||||||
|
|
||||||
## [0.16.1]
|
## [0.16.1]
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
@ -515,22 +515,6 @@ if(BUILD_TESTS)
|
||||||
4000
|
4000
|
||||||
)
|
)
|
||||||
|
|
||||||
add_e2e_test(
|
|
||||||
NAME recovery_snapshot_test
|
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py
|
|
||||||
CONSENSUS cft
|
|
||||||
ADDITIONAL_ARGS
|
|
||||||
--recovery
|
|
||||||
2
|
|
||||||
# Shorten Raft election timeout to speed up test when it kills a node on
|
|
||||||
# purpose to check that a recovery network is robust to a view change.
|
|
||||||
--raft-election-timeout
|
|
||||||
4000
|
|
||||||
--snapshot-tx-interval
|
|
||||||
5
|
|
||||||
--use-snapshot
|
|
||||||
)
|
|
||||||
|
|
||||||
add_e2e_test(
|
add_e2e_test(
|
||||||
NAME rekey_test
|
NAME rekey_test
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rekey.py
|
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rekey.py
|
||||||
|
@ -559,7 +543,7 @@ if(BUILD_TESTS)
|
||||||
LABEL suite
|
LABEL suite
|
||||||
ADDITIONAL_ARGS
|
ADDITIONAL_ARGS
|
||||||
--test-duration
|
--test-duration
|
||||||
150
|
200
|
||||||
--enforce-reqs
|
--enforce-reqs
|
||||||
--test-suite
|
--test-suite
|
||||||
reconfiguration
|
reconfiguration
|
||||||
|
@ -567,23 +551,6 @@ if(BUILD_TESTS)
|
||||||
4000
|
4000
|
||||||
)
|
)
|
||||||
|
|
||||||
add_e2e_test(
|
|
||||||
NAME snapshots_test_suite
|
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py
|
|
||||||
CONSENSUS cft
|
|
||||||
LABEL suite
|
|
||||||
ADDITIONAL_ARGS
|
|
||||||
--test-duration
|
|
||||||
150
|
|
||||||
--enforce-reqs
|
|
||||||
--test-suite
|
|
||||||
snapshots
|
|
||||||
--raft-election-timeout
|
|
||||||
4000
|
|
||||||
--snapshot-tx-interval
|
|
||||||
5
|
|
||||||
)
|
|
||||||
|
|
||||||
add_e2e_test(
|
add_e2e_test(
|
||||||
NAME full_test_suite
|
NAME full_test_suite
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py
|
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py
|
||||||
|
@ -680,13 +647,6 @@ if(BUILD_TESTS)
|
||||||
ADDITIONAL_ARGS --raft-election-timeout 4000
|
ADDITIONAL_ARGS --raft-election-timeout 4000
|
||||||
)
|
)
|
||||||
|
|
||||||
add_e2e_test(
|
|
||||||
NAME reconfiguration_snapshot_test
|
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py
|
|
||||||
CONSENSUS cft
|
|
||||||
ADDITIONAL_ARGS --snapshot-tx-interval 10 --raft-election-timeout 4000
|
|
||||||
)
|
|
||||||
|
|
||||||
add_e2e_test(
|
add_e2e_test(
|
||||||
NAME code_update_test
|
NAME code_update_test
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
|
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py
|
||||||
|
@ -744,11 +704,13 @@ if(BUILD_TESTS)
|
||||||
)
|
)
|
||||||
|
|
||||||
if(NOT SAN)
|
if(NOT SAN)
|
||||||
|
# Writing new ledger files and generating new snapshots uses more file
|
||||||
|
# descriptors so disable those for this test
|
||||||
add_e2e_test(
|
add_e2e_test(
|
||||||
NAME connections_cft
|
NAME connections_cft
|
||||||
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py
|
PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py
|
||||||
CONSENSUS cft
|
CONSENSUS cft
|
||||||
ADDITIONAL_ARGS --ledger-chunk-bytes 100Mib
|
ADDITIONAL_ARGS --ledger-chunk-bytes 100Mib --snapshot-tx-interval 10000
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -842,7 +804,7 @@ if(BUILD_TESTS)
|
||||||
--max-writes-ahead
|
--max-writes-ahead
|
||||||
1000
|
1000
|
||||||
--repetitions
|
--repetitions
|
||||||
1000
|
10000
|
||||||
--msg-ser-fmt
|
--msg-ser-fmt
|
||||||
msgpack
|
msgpack
|
||||||
)
|
)
|
||||||
|
@ -881,7 +843,7 @@ if(BUILD_TESTS)
|
||||||
--max-writes-ahead
|
--max-writes-ahead
|
||||||
1000
|
1000
|
||||||
--repetitions
|
--repetitions
|
||||||
800
|
1000
|
||||||
--msg-ser-fmt
|
--msg-ser-fmt
|
||||||
text
|
text
|
||||||
)
|
)
|
||||||
|
|
|
@ -546,7 +546,8 @@ function(add_perf_test)
|
||||||
${PYTHON} ${PARSED_ARGS_PYTHON_SCRIPT} -b . -c ${PARSED_ARGS_CLIENT_BIN}
|
${PYTHON} ${PARSED_ARGS_PYTHON_SCRIPT} -b . -c ${PARSED_ARGS_CLIENT_BIN}
|
||||||
${CCF_NETWORK_TEST_ARGS} --consensus ${PARSED_ARGS_CONSENSUS} -g
|
${CCF_NETWORK_TEST_ARGS} --consensus ${PARSED_ARGS_CONSENSUS} -g
|
||||||
${PARSED_ARGS_GOV_SCRIPT} --write-tx-times ${VERIFICATION_ARG} --label
|
${PARSED_ARGS_GOV_SCRIPT} --write-tx-times ${VERIFICATION_ARG} --label
|
||||||
${LABEL_ARG} ${PARSED_ARGS_ADDITIONAL_ARGS} ${NODES}
|
${LABEL_ARG} --snapshot-tx-interval 10000 ${PARSED_ARGS_ADDITIONAL_ARGS}
|
||||||
|
${NODES}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Make python test client framework importable
|
# Make python test client framework importable
|
||||||
|
|
|
@ -50,9 +50,7 @@ To avoid this, it is possible for a new node to be added (or a service to be rec
|
||||||
Snapshot Generation
|
Snapshot Generation
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
Snapshots are generated at regular intervals by the current primary node and stored under the directory specified via the ``--snapshot-dir`` CLI option (defaults to ``snapshots/``). The transaction interval at which snapshots are generated is specified via the ``--snapshot-tx-interval`` CLI option (defaults to no snapshot).
|
Snapshots are generated at regular intervals by the current primary node and stored under the directory specified via the ``--snapshot-dir`` CLI option (defaults to ``snapshots/``). The transaction interval at which snapshots are generated is specified via the ``--snapshot-tx-interval`` CLI option (defaults to a new snapshot generated every ``10,000`` committed transactions).
|
||||||
|
|
||||||
.. TODO: Change defaults once https://github.com/microsoft/CCF/issues/1956 is complete
|
|
||||||
|
|
||||||
.. note:: Because the generation of a snapshot requires a new ledger chunk to be created (see :ref:`operations/ledger_snapshot:File Layout`), all nodes in the network must be started with the same ``--snapshot-tx-interval`` value.
|
.. note:: Because the generation of a snapshot requires a new ledger chunk to be created (see :ref:`operations/ledger_snapshot:File Layout`), all nodes in the network must be started with the same ``--snapshot-tx-interval`` value.
|
||||||
|
|
||||||
|
|
|
@ -1115,6 +1115,7 @@ namespace aft
|
||||||
{
|
{
|
||||||
LOG_FAIL_FMT("Follower failed to apply log entry: {}", i);
|
LOG_FAIL_FMT("Follower failed to apply log entry: {}", i);
|
||||||
state->last_idx--;
|
state->last_idx--;
|
||||||
|
ledger->truncate(state->last_idx);
|
||||||
send_append_entries_response(
|
send_append_entries_response(
|
||||||
r.from_node, AppendEntriesResponseType::FAIL);
|
r.from_node, AppendEntriesResponseType::FAIL);
|
||||||
break;
|
break;
|
||||||
|
@ -2025,8 +2026,9 @@ namespace aft
|
||||||
|
|
||||||
LOG_DEBUG_FMT("Compacting...");
|
LOG_DEBUG_FMT("Compacting...");
|
||||||
snapshotter->commit(idx);
|
snapshotter->commit(idx);
|
||||||
if (replica_state == Leader)
|
if (replica_state == Leader && consensus_type == ConsensusType::CFT)
|
||||||
{
|
{
|
||||||
|
// Snapshots are not yet supported with BFT
|
||||||
snapshotter->snapshot(idx);
|
snapshotter->snapshot(idx);
|
||||||
}
|
}
|
||||||
store->compact(idx);
|
store->compact(idx);
|
||||||
|
|
|
@ -160,13 +160,12 @@ int main(int argc, char** argv)
|
||||||
->capture_default_str()
|
->capture_default_str()
|
||||||
->transform(CLI::AsSizeValue(true)); // 1000 is kb
|
->transform(CLI::AsSizeValue(true)); // 1000 is kb
|
||||||
|
|
||||||
size_t snapshot_tx_interval = std::numeric_limits<std::size_t>::max();
|
size_t snapshot_tx_interval = 10'000;
|
||||||
app
|
app
|
||||||
.add_option(
|
.add_option(
|
||||||
"--snapshot-tx-interval",
|
"--snapshot-tx-interval",
|
||||||
snapshot_tx_interval,
|
snapshot_tx_interval,
|
||||||
"Number of transactions between snapshots (experimental). "
|
"Number of transactions between snapshots")
|
||||||
"Defaults to no snapshot.")
|
|
||||||
->capture_default_str();
|
->capture_default_str();
|
||||||
|
|
||||||
logger::Level host_log_level{logger::Level::INFO};
|
logger::Level host_log_level{logger::Level::INFO};
|
||||||
|
|
|
@ -169,7 +169,8 @@ namespace asynchost
|
||||||
get_snapshot_idx_from_file_name(file_name) == snapshot_idx)
|
get_snapshot_idx_from_file_name(file_name) == snapshot_idx)
|
||||||
{
|
{
|
||||||
LOG_INFO_FMT(
|
LOG_INFO_FMT(
|
||||||
"Committing snapshot file \"{}\" with evidence proof committed at "
|
"Committing snapshot file \"{}\" with evidence proof committed "
|
||||||
|
"at "
|
||||||
"{}",
|
"{}",
|
||||||
file_name,
|
file_name,
|
||||||
evidence_commit_idx);
|
evidence_commit_idx);
|
||||||
|
|
|
@ -53,16 +53,24 @@ def test_verify_quotes(network, args):
|
||||||
|
|
||||||
@reqs.description("Node with bad code fails to join")
|
@reqs.description("Node with bad code fails to join")
|
||||||
def test_add_node_with_bad_code(network, args):
|
def test_add_node_with_bad_code(network, args):
|
||||||
|
if args.enclave_type == "virtual":
|
||||||
|
LOG.warning("Skipping test_add_node_with_bad_code with virtual enclave")
|
||||||
|
return network
|
||||||
|
|
||||||
|
replacement_package = (
|
||||||
|
"liblogging" if args.package == "libjs_generic" else "libjs_generic"
|
||||||
|
)
|
||||||
|
|
||||||
new_code_id = get_code_id(
|
new_code_id = get_code_id(
|
||||||
args.oe_binary,
|
args.oe_binary,
|
||||||
infra.path.build_lib_path(args.replacement_package, args.enclave_type),
|
infra.path.build_lib_path(replacement_package, args.enclave_type),
|
||||||
)
|
)
|
||||||
|
|
||||||
LOG.info(f"Adding a node with unsupported code id {new_code_id}")
|
LOG.info(f"Adding a node with unsupported code id {new_code_id}")
|
||||||
code_not_found_exception = None
|
code_not_found_exception = None
|
||||||
try:
|
try:
|
||||||
network.create_and_add_pending_node(
|
network.create_and_add_pending_node(
|
||||||
args.replacement_package, "local://localhost", args, timeout=3
|
replacement_package, "local://localhost", args, timeout=3
|
||||||
)
|
)
|
||||||
except infra.network.CodeIdNotFound as err:
|
except infra.network.CodeIdNotFound as err:
|
||||||
code_not_found_exception = err
|
code_not_found_exception = err
|
||||||
|
@ -76,11 +84,15 @@ def test_add_node_with_bad_code(network, args):
|
||||||
|
|
||||||
@reqs.description("Update all nodes code")
|
@reqs.description("Update all nodes code")
|
||||||
def test_update_all_nodes(network, args):
|
def test_update_all_nodes(network, args):
|
||||||
|
replacement_package = (
|
||||||
|
"liblogging" if args.package == "libjs_generic" else "libjs_generic"
|
||||||
|
)
|
||||||
|
|
||||||
primary, _ = network.find_nodes()
|
primary, _ = network.find_nodes()
|
||||||
|
|
||||||
first_code_id, new_code_id = [
|
first_code_id, new_code_id = [
|
||||||
get_code_id(args.oe_binary, infra.path.build_lib_path(pkg, args.enclave_type))
|
get_code_id(args.oe_binary, infra.path.build_lib_path(pkg, args.enclave_type))
|
||||||
for pkg in [args.package, args.replacement_package]
|
for pkg in [args.package, replacement_package]
|
||||||
]
|
]
|
||||||
|
|
||||||
LOG.info("Add new code id")
|
LOG.info("Add new code id")
|
||||||
|
@ -115,7 +127,7 @@ def test_update_all_nodes(network, args):
|
||||||
LOG.info("Start fresh nodes running new code")
|
LOG.info("Start fresh nodes running new code")
|
||||||
for _ in range(0, len(network.nodes)):
|
for _ in range(0, len(network.nodes)):
|
||||||
new_node = network.create_and_trust_node(
|
new_node = network.create_and_trust_node(
|
||||||
args.replacement_package, "local://localhost", args
|
replacement_package, "local://localhost", args
|
||||||
)
|
)
|
||||||
assert new_node
|
assert new_node
|
||||||
|
|
||||||
|
@ -156,6 +168,5 @@ if __name__ == "__main__":
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
args.package = "liblogging"
|
args.package = "liblogging"
|
||||||
args.replacement_package = "libjs_generic"
|
|
||||||
args.nodes = infra.e2e_args.min_nodes(args, f=1)
|
args.nodes = infra.e2e_args.min_nodes(args, f=1)
|
||||||
run(args)
|
run(args)
|
||||||
|
|
|
@ -246,7 +246,8 @@ def cli_args(add=lambda x: None, parser=None, accept_unknown=False):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--snapshot-tx-interval",
|
"--snapshot-tx-interval",
|
||||||
help="Number of transactions between two snapshots",
|
help="Number of transactions between two snapshots",
|
||||||
default=None,
|
type=int,
|
||||||
|
default=10,
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--jwt-key-refresh-interval-s",
|
"--jwt-key-refresh-interval-s",
|
||||||
|
|
|
@ -179,7 +179,7 @@ class Network:
|
||||||
ledger_dir=None,
|
ledger_dir=None,
|
||||||
copy_ledger_read_only=False,
|
copy_ledger_read_only=False,
|
||||||
read_only_ledger_dir=None,
|
read_only_ledger_dir=None,
|
||||||
from_snapshot=False,
|
from_snapshot=True,
|
||||||
snapshot_dir=None,
|
snapshot_dir=None,
|
||||||
):
|
):
|
||||||
forwarded_args = {
|
forwarded_args = {
|
||||||
|
@ -198,23 +198,29 @@ class Network:
|
||||||
# specified
|
# specified
|
||||||
if from_snapshot and snapshot_dir is None:
|
if from_snapshot and snapshot_dir is None:
|
||||||
snapshot_dir = self.get_committed_snapshots(target_node)
|
snapshot_dir = self.get_committed_snapshots(target_node)
|
||||||
assert os.listdir(
|
|
||||||
snapshot_dir
|
|
||||||
), f"There are no snapshots to resume from in directory {snapshot_dir}"
|
|
||||||
|
|
||||||
committed_ledger_dir = None
|
committed_ledger_dir = None
|
||||||
current_ledger_dir = None
|
current_ledger_dir = None
|
||||||
if snapshot_dir is not None:
|
if from_snapshot:
|
||||||
LOG.info(f"Joining from snapshot directory: {snapshot_dir}")
|
if os.listdir(snapshot_dir):
|
||||||
# Only when joining from snapshot, retrieve ledger dirs from target node
|
LOG.info(f"Joining from snapshot directory: {snapshot_dir}")
|
||||||
# if the ledger directories are not specified. When joining without snapshot,
|
# Only when joining from snapshot, retrieve ledger dirs from target node
|
||||||
# the entire ledger will be retransmitted by primary node
|
# if the ledger directories are not specified. When joining without snapshot,
|
||||||
current_ledger_dir = ledger_dir or None
|
# the entire ledger will be retransmitted by primary node
|
||||||
committed_ledger_dir = read_only_ledger_dir or None
|
current_ledger_dir = ledger_dir or None
|
||||||
if copy_ledger_read_only and read_only_ledger_dir is None:
|
committed_ledger_dir = read_only_ledger_dir or None
|
||||||
current_ledger_dir, committed_ledger_dir = target_node.get_ledger(
|
if copy_ledger_read_only and read_only_ledger_dir is None:
|
||||||
include_read_only_dirs=True
|
current_ledger_dir, committed_ledger_dir = target_node.get_ledger(
|
||||||
|
include_read_only_dirs=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
LOG.warning(
|
||||||
|
f"Attempting to join from snapshot but {snapshot_dir} is empty: defaulting to complete replay of transaction history"
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
LOG.info(
|
||||||
|
"Joining without snapshot: complete transaction history will be replayed"
|
||||||
|
)
|
||||||
|
|
||||||
node.join(
|
node.join(
|
||||||
lib_name=lib_name,
|
lib_name=lib_name,
|
||||||
|
@ -293,12 +299,14 @@ class Network:
|
||||||
)
|
)
|
||||||
self._adjust_local_node_ids(node)
|
self._adjust_local_node_ids(node)
|
||||||
else:
|
else:
|
||||||
|
# When a new service is started, initial nodes join without a snapshot
|
||||||
self._add_node(
|
self._add_node(
|
||||||
node,
|
node,
|
||||||
args.package,
|
args.package,
|
||||||
args,
|
args,
|
||||||
recovery=recovery,
|
recovery=recovery,
|
||||||
ledger_dir=ledger_dir,
|
ledger_dir=ledger_dir,
|
||||||
|
from_snapshot=snapshot_dir is not None,
|
||||||
read_only_ledger_dir=read_only_ledger_dir,
|
read_only_ledger_dir=read_only_ledger_dir,
|
||||||
snapshot_dir=snapshot_dir,
|
snapshot_dir=snapshot_dir,
|
||||||
)
|
)
|
||||||
|
@ -494,9 +502,8 @@ class Network:
|
||||||
host,
|
host,
|
||||||
args,
|
args,
|
||||||
target_node=None,
|
target_node=None,
|
||||||
from_snapshot=False,
|
|
||||||
copy_ledger_read_only=False,
|
|
||||||
timeout=JOIN_TIMEOUT,
|
timeout=JOIN_TIMEOUT,
|
||||||
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new node and add it to the network. Note that the new node
|
Create a new node and add it to the network. Note that the new node
|
||||||
|
@ -509,8 +516,7 @@ class Network:
|
||||||
lib_name,
|
lib_name,
|
||||||
args,
|
args,
|
||||||
target_node,
|
target_node,
|
||||||
from_snapshot=from_snapshot,
|
**kwargs,
|
||||||
copy_ledger_read_only=copy_ledger_read_only,
|
|
||||||
)
|
)
|
||||||
primary, _ = self.find_primary()
|
primary, _ = self.find_primary()
|
||||||
try:
|
try:
|
||||||
|
@ -547,8 +553,7 @@ class Network:
|
||||||
host,
|
host,
|
||||||
args,
|
args,
|
||||||
target_node=None,
|
target_node=None,
|
||||||
from_snapshot=False,
|
**kwargs,
|
||||||
copy_ledger_read_only=False,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new node, add it to the network and let members vote to trust
|
Create a new node, add it to the network and let members vote to trust
|
||||||
|
@ -559,8 +564,7 @@ class Network:
|
||||||
host,
|
host,
|
||||||
args,
|
args,
|
||||||
target_node,
|
target_node,
|
||||||
from_snapshot,
|
**kwargs,
|
||||||
copy_ledger_read_only,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
primary, _ = self.find_primary()
|
primary, _ = self.find_primary()
|
||||||
|
@ -665,7 +669,7 @@ class Network:
|
||||||
assert "Primary unknown" in res.body.text(), res
|
assert "Primary unknown" in res.body.text(), res
|
||||||
except CCFConnectionException:
|
except CCFConnectionException:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
f"Could not successful connect to node {node.node_id}. Retrying..."
|
f"Could not successfully connect to node {node.node_id}. Retrying..."
|
||||||
)
|
)
|
||||||
if primary_id is not None:
|
if primary_id is not None:
|
||||||
break
|
break
|
||||||
|
|
|
@ -43,9 +43,14 @@ def check_can_progress(node, timeout=3):
|
||||||
assert False, f"Stuck at {r}"
|
assert False, f"Stuck at {r}"
|
||||||
|
|
||||||
|
|
||||||
@reqs.description("Adding a valid node from primary")
|
@reqs.description("Adding a valid node without snapshot")
|
||||||
def test_add_node(network, args):
|
def test_add_node(network, args):
|
||||||
new_node = network.create_and_trust_node(args.package, "local://localhost", args)
|
new_node = network.create_and_trust_node(
|
||||||
|
args.package,
|
||||||
|
"local://localhost",
|
||||||
|
args,
|
||||||
|
from_snapshot=False,
|
||||||
|
)
|
||||||
with new_node.client() as c:
|
with new_node.client() as c:
|
||||||
s = c.get("/node/state")
|
s = c.get("/node/state")
|
||||||
assert s.body.json()["id"] == new_node.node_id
|
assert s.body.json()["id"] == new_node.node_id
|
||||||
|
@ -56,14 +61,25 @@ def test_add_node(network, args):
|
||||||
@reqs.description("Adding a valid node from a backup")
|
@reqs.description("Adding a valid node from a backup")
|
||||||
@reqs.at_least_n_nodes(2)
|
@reqs.at_least_n_nodes(2)
|
||||||
def test_add_node_from_backup(network, args):
|
def test_add_node_from_backup(network, args):
|
||||||
backup = network.find_any_backup()
|
primary, backup = network.find_primary_and_any_backup()
|
||||||
|
|
||||||
|
# Retrieve snapshot from primary as only primary node
|
||||||
|
# generates snapshots
|
||||||
|
snapshot_dir = network.get_committed_snapshots(primary)
|
||||||
|
|
||||||
new_node = network.create_and_trust_node(
|
new_node = network.create_and_trust_node(
|
||||||
args.package, "local://localhost", args, target_node=backup
|
args.package,
|
||||||
|
"local://localhost",
|
||||||
|
args,
|
||||||
|
target_node=backup,
|
||||||
|
snapshot_dir=snapshot_dir,
|
||||||
)
|
)
|
||||||
assert new_node
|
assert new_node
|
||||||
return network
|
return network
|
||||||
|
|
||||||
|
|
||||||
|
# Note: this test cannot be included in the full test suite yet as
|
||||||
|
# add_from_snapshot() decorator makes use of historical queries (#1648)
|
||||||
@reqs.description("Adding a valid node from snapshot")
|
@reqs.description("Adding a valid node from snapshot")
|
||||||
@reqs.at_least_n_nodes(2)
|
@reqs.at_least_n_nodes(2)
|
||||||
@reqs.add_from_snapshot()
|
@reqs.add_from_snapshot()
|
||||||
|
@ -72,7 +88,6 @@ def test_add_node_from_snapshot(network, args, copy_ledger_read_only=True):
|
||||||
args.package,
|
args.package,
|
||||||
"local://localhost",
|
"local://localhost",
|
||||||
args,
|
args,
|
||||||
from_snapshot=True,
|
|
||||||
copy_ledger_read_only=copy_ledger_read_only,
|
copy_ledger_read_only=copy_ledger_read_only,
|
||||||
)
|
)
|
||||||
assert new_node
|
assert new_node
|
||||||
|
@ -89,35 +104,15 @@ def test_add_as_many_pending_nodes(network, args):
|
||||||
)
|
)
|
||||||
|
|
||||||
for _ in range(number_new_nodes):
|
for _ in range(number_new_nodes):
|
||||||
network.create_and_add_pending_node(args.package, "local://localhost", args)
|
network.create_and_add_pending_node(
|
||||||
|
args.package,
|
||||||
|
"local://localhost",
|
||||||
|
args,
|
||||||
|
)
|
||||||
check_can_progress(network.find_primary()[0])
|
check_can_progress(network.find_primary()[0])
|
||||||
return network
|
return network
|
||||||
|
|
||||||
|
|
||||||
@reqs.description("Add node with untrusted code version")
|
|
||||||
def test_add_node_untrusted_code(network, args):
|
|
||||||
if args.enclave_type != "virtual":
|
|
||||||
LOG.info("Adding an invalid node (unknown code id)")
|
|
||||||
code_not_found_exception = None
|
|
||||||
try:
|
|
||||||
lib_name = (
|
|
||||||
"liblogging" if args.package == "libjs_generic" else "libjs_generic"
|
|
||||||
)
|
|
||||||
network.create_and_add_pending_node(
|
|
||||||
lib_name, "local://localhost", args, timeout=3
|
|
||||||
)
|
|
||||||
except infra.network.CodeIdNotFound as err:
|
|
||||||
code_not_found_exception = err
|
|
||||||
|
|
||||||
assert (
|
|
||||||
code_not_found_exception is not None
|
|
||||||
), "Adding node with unknown code id should fail"
|
|
||||||
|
|
||||||
else:
|
|
||||||
LOG.warning("Skipping unknown code id test with virtual enclave")
|
|
||||||
return network
|
|
||||||
|
|
||||||
|
|
||||||
@reqs.description("Retiring a backup")
|
@reqs.description("Retiring a backup")
|
||||||
@reqs.at_least_n_nodes(2)
|
@reqs.at_least_n_nodes(2)
|
||||||
@reqs.can_kill_n_nodes(1)
|
@reqs.can_kill_n_nodes(1)
|
||||||
|
@ -161,23 +156,21 @@ def run(args):
|
||||||
|
|
||||||
test_add_node_from_backup(network, args)
|
test_add_node_from_backup(network, args)
|
||||||
test_add_node(network, args)
|
test_add_node(network, args)
|
||||||
test_add_node_untrusted_code(network, args)
|
|
||||||
test_retire_backup(network, args)
|
test_retire_backup(network, args)
|
||||||
test_add_as_many_pending_nodes(network, args)
|
test_add_as_many_pending_nodes(network, args)
|
||||||
test_add_node(network, args)
|
test_add_node(network, args)
|
||||||
test_retire_primary(network, args)
|
test_retire_primary(network, args)
|
||||||
|
|
||||||
if args.snapshot_tx_interval is not None:
|
test_add_node_from_snapshot(network, args)
|
||||||
test_add_node_from_snapshot(network, args, copy_ledger_read_only=True)
|
test_add_node_from_snapshot(network, args, copy_ledger_read_only=False)
|
||||||
test_add_node_from_snapshot(network, args, copy_ledger_read_only=False)
|
errors, _ = network.get_joined_nodes()[-1].stop()
|
||||||
errors, _ = network.get_joined_nodes()[-1].stop()
|
if not any(
|
||||||
if not any(
|
"No snapshot found: Node will request all historical transactions" in s
|
||||||
"No snapshot found: Node will request all historical transactions" in s
|
for s in errors
|
||||||
for s in errors
|
):
|
||||||
):
|
raise ValueError(
|
||||||
raise ValueError(
|
"New node shouldn't join from snapshot if snapshot cannot be verified"
|
||||||
"New node shouldn't join from snapshot if snapshot cannot be verified"
|
)
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -111,13 +111,14 @@ def run(args):
|
||||||
network.start_and_join(args)
|
network.start_and_join(args)
|
||||||
|
|
||||||
for i in range(args.recovery):
|
for i in range(args.recovery):
|
||||||
# Alternate between recovery with primary change and stable primary-ship
|
# Alternate between recovery with primary change and stable primary-ship,
|
||||||
|
# with and without snapshots
|
||||||
if i % 2 == 0:
|
if i % 2 == 0:
|
||||||
recovered_network = test_share_resilience(
|
recovered_network = test_share_resilience(
|
||||||
network, args, args.use_snapshot
|
network, args, from_snapshot=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
recovered_network = test(network, args, args.use_snapshot)
|
recovered_network = test(network, args, from_snapshot=False)
|
||||||
network.stop_all_nodes()
|
network.stop_all_nodes()
|
||||||
network = recovered_network
|
network = recovered_network
|
||||||
LOG.success("Recovery complete on all nodes")
|
LOG.success("Recovery complete on all nodes")
|
||||||
|
@ -142,12 +143,6 @@ checked. Note that the key for each logging message is unique (per table).
|
||||||
type=int,
|
type=int,
|
||||||
default=5,
|
default=5,
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--use-snapshot",
|
|
||||||
help="Use latest snapshot for faster recovery procedure",
|
|
||||||
action="store_true",
|
|
||||||
default=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
args = infra.e2e_args.cli_args(add)
|
args = infra.e2e_args.cli_args(add)
|
||||||
args.package = "liblogging"
|
args.package = "liblogging"
|
||||||
|
|
|
@ -82,5 +82,6 @@ exec python "${START_NETWORK_SCRIPT}" \
|
||||||
--initial-user-count 1 \
|
--initial-user-count 1 \
|
||||||
--gov-script "${GOV_SCRIPT}" \
|
--gov-script "${GOV_SCRIPT}" \
|
||||||
--ledger-chunk-bytes 5MB \
|
--ledger-chunk-bytes 5MB \
|
||||||
|
--snapshot-tx-interval 10000 \
|
||||||
--label sandbox \
|
--label sandbox \
|
||||||
"${extra_args[@]}"
|
"${extra_args[@]}"
|
||||||
|
|
|
@ -43,7 +43,16 @@ suites["membership_recovery"] = suite_membership_recovery
|
||||||
|
|
||||||
# This suite tests that nodes addition, deletion and primary changes
|
# This suite tests that nodes addition, deletion and primary changes
|
||||||
# can be interleaved
|
# can be interleaved
|
||||||
|
# Note: snapshot tests are not yet integrated in the main test suite
|
||||||
|
# as they test historical queries which do not yet work across rekey/recovery
|
||||||
|
# https://github.com/microsoft/CCF/issues/1648
|
||||||
suite_reconfiguration = [
|
suite_reconfiguration = [
|
||||||
|
reconfiguration.test_add_node_from_snapshot,
|
||||||
|
reconfiguration.test_add_node_from_snapshot,
|
||||||
|
election.test_kill_primary,
|
||||||
|
reconfiguration.test_add_node_from_snapshot,
|
||||||
|
reconfiguration.test_retire_primary,
|
||||||
|
e2e_logging.test_view_history,
|
||||||
reconfiguration.test_add_node,
|
reconfiguration.test_add_node,
|
||||||
reconfiguration.test_retire_primary,
|
reconfiguration.test_retire_primary,
|
||||||
reconfiguration.test_add_node,
|
reconfiguration.test_add_node,
|
||||||
|
@ -51,21 +60,9 @@ suite_reconfiguration = [
|
||||||
reconfiguration.test_add_node,
|
reconfiguration.test_add_node,
|
||||||
reconfiguration.test_add_node,
|
reconfiguration.test_add_node,
|
||||||
reconfiguration.test_retire_backup,
|
reconfiguration.test_retire_backup,
|
||||||
reconfiguration.test_add_node,
|
|
||||||
election.test_kill_primary,
|
|
||||||
]
|
]
|
||||||
suites["reconfiguration"] = suite_reconfiguration
|
suites["reconfiguration"] = suite_reconfiguration
|
||||||
|
|
||||||
# Temporary suite while snapshotting feature is being implemented
|
|
||||||
# https://github.com/microsoft/CCF/milestone/12
|
|
||||||
suite_snapshots = [
|
|
||||||
reconfiguration.test_add_node_from_snapshot,
|
|
||||||
election.test_kill_primary,
|
|
||||||
reconfiguration.test_add_node_from_snapshot,
|
|
||||||
e2e_logging.test_view_history,
|
|
||||||
]
|
|
||||||
suites["snapshots"] = suite_snapshots
|
|
||||||
|
|
||||||
all_tests_suite = [
|
all_tests_suite = [
|
||||||
# e2e_logging:
|
# e2e_logging:
|
||||||
e2e_logging.test,
|
e2e_logging.test,
|
||||||
|
@ -93,7 +90,6 @@ all_tests_suite = [
|
||||||
reconfiguration.test_add_node,
|
reconfiguration.test_add_node,
|
||||||
reconfiguration.test_add_node_from_backup,
|
reconfiguration.test_add_node_from_backup,
|
||||||
reconfiguration.test_add_as_many_pending_nodes,
|
reconfiguration.test_add_as_many_pending_nodes,
|
||||||
reconfiguration.test_add_node_untrusted_code,
|
|
||||||
reconfiguration.test_retire_backup,
|
reconfiguration.test_retire_backup,
|
||||||
# recovery:
|
# recovery:
|
||||||
recovery.test,
|
recovery.test,
|
||||||
|
@ -104,6 +100,7 @@ all_tests_suite = [
|
||||||
election.test_kill_primary,
|
election.test_kill_primary,
|
||||||
# code update:
|
# code update:
|
||||||
code_update.test_verify_quotes,
|
code_update.test_verify_quotes,
|
||||||
|
code_update.test_add_node_with_bad_code,
|
||||||
]
|
]
|
||||||
suites["all"] = all_tests_suite
|
suites["all"] = all_tests_suite
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче