Better reporting on failure to start, tighten reconfiguration and code update tests (#660)

This commit is contained in:
Amaury Chamayou 2019-12-20 10:44:50 +00:00 коммит произвёл GitHub
Родитель 363eb2f7bf
Коммит 393febe023
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 30 добавлений и 16 удалений

Просмотреть файл

@ -41,10 +41,13 @@ def run(args):
new_code_id = get_code_id(f"{args.patched_file_name}.so.signed")
LOG.info(f"Adding a node with unsupported code id {new_code_id}")
assert (
try:
network.create_and_trust_node(args.patched_file_name, "localhost", args)
== None
), "Adding node with unsupported code id should fail"
assert (
False
), f"Adding a node with unsupported code id {new_code_id} should fail"
except TimeoutError as err:
assert "CODE_ID_NOT_FOUND" in err.message, err.message
network.consortium.add_new_code(1, primary, new_code_id)

Просмотреть файл

@ -244,13 +244,17 @@ class Network:
else infra.node.NodeStatus.TRUSTED
),
)
except TimeoutError:
except TimeoutError as err:
# The node can be safely discarded since it has not been
# attributed a unique node_id by CCF
LOG.error(f"New pending node {new_node.node_id} failed to join the network")
new_node.stop()
errors = new_node.stop()
if errors:
for error in errors:
if "An error occurred while joining the network" in error:
err.message = f"TimeoutError: {error.split('|', 1)[1]}"
self.nodes.remove(new_node)
return None
raise
return new_node
@ -260,8 +264,6 @@ class Network:
it so that it becomes part of the consensus protocol.
"""
new_node = self.create_and_add_pending_node(lib_name, host, args, target_node)
if new_node is None:
return None
primary, _ = self.find_primary()
try:
@ -272,7 +274,7 @@ class Network:
except (ValueError, TimeoutError):
LOG.error(f"New trusted node {new_node.node_id} failed to join the network")
new_node.stop()
return None
raise
new_node.network_state = infra.node.NodeNetworkState.joined
if args.consensus != "pbft":

Просмотреть файл

@ -159,8 +159,9 @@ class Node:
def stop(self):
if self.remote:
self.remote.stop()
errors = self.remote.stop()
self.network_state = NodeNetworkState.stopped
return errors
def is_stopped(self):
return self.network_state == NodeNetworkState.stopped

Просмотреть файл

@ -59,6 +59,7 @@ def sftp_session(hostname):
def log_errors(out_path, err_path):
error_filter = ["[fail ]", "[fatal]"]
error_lines = []
try:
errors = 0
tail_lines = deque(maxlen=10)
@ -68,6 +69,7 @@ def log_errors(out_path, err_path):
tail_lines.append(stripped_line)
if any(x in stripped_line for x in error_filter):
LOG.error("{}: {}".format(out_path, stripped_line))
error_lines.append(stripped_line)
errors += 1
if errors:
LOG.info("{} errors found, printing end of output for context:", errors)
@ -81,6 +83,7 @@ def log_errors(out_path, err_path):
LOG.exception("Could not read err output {}".format(err_path))
except IOError:
LOG.exception("Could not check output {} for errors".format(out_path))
return error_lines
class CmdMixin(object):
@ -259,12 +262,13 @@ class SSHRemote(CmdMixin):
"""
LOG.info("[{}] closing".format(self.hostname))
self.get_logs()
log_errors(
errors = log_errors(
"{}_out_{}".format(self.hostname, self.name),
"{}_err_{}".format(self.hostname, self.name),
)
self.client.close()
self.proc_client.close()
return errors
def setup(self):
"""
@ -434,7 +438,7 @@ class LocalRemote(CmdMixin):
self.stdout.close()
if self.stderr:
self.stderr.close()
log_errors(self.out, self.err)
return log_errors(self.out, self.err)
def setup(self):
"""
@ -664,8 +668,9 @@ class CCFRemote(object):
return self.remote._dbg()
def stop(self):
errors = []
try:
self.remote.stop()
errors = self.remote.stop()
except Exception:
LOG.exception("Failed to shut down {} cleanly".format(self.local_node_id))
if self.profraw:
@ -673,6 +678,7 @@ class CCFRemote(object):
self.remote.get(self.profraw)
except Exception:
LOG.info(f"Could not retrieve {self.profraw}")
return errors
def wait_for_stdout_line(self, line, timeout=5):
return self.remote.wait_for_stdout_line(line, timeout)

Просмотреть файл

@ -58,9 +58,11 @@ def test_add_as_many_pending_nodes(network, args):
def test_add_node_untrusted_code(network, args):
if args.enclave_type == "debug":
LOG.info("Adding an invalid node (unknown code id)")
assert (
network.create_and_trust_node("libluagenericenc", "localhost", args) == None
), "Adding node with unknown code id should fail"
try:
network.create_and_trust_node("libluagenericenc", "localhost", args)
assert False, "Adding node with unknown code id should fail"
except TimeoutError as err:
assert "CODE_ID_NOT_FOUND" in err.message, err.message
else:
LOG.warning("Skipping unknown code id test with virtual enclave")
return network