CCF/tests/infra/remote.py

809 строки
26 KiB
Python
Исходник Обычный вид История

2019-04-26 18:27:27 +03:00
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the Apache 2.0 License.
import os
import time
from enum import Enum
import paramiko
import subprocess
from contextlib import contextmanager
import infra.path
2019-05-29 12:42:17 +03:00
import uuid
import ctypes
import signal
2019-08-08 18:11:46 +03:00
import re
2020-06-15 17:58:45 +03:00
import stat
import shutil
from collections import deque
2019-04-26 18:27:27 +03:00
from loguru import logger as LOG
DBG = os.getenv("DBG", "cgdb")
FILE_TIMEOUT = 60
2019-04-26 18:27:27 +03:00
_libc = ctypes.CDLL("libc.so.6")
def _term_on_pdeathsig():
# usr/include/linux/prctl.h: #define PR_SET_PDEATHSIG 1
_libc.prctl(1, signal.SIGTERM)
def popen(*args, **kwargs):
kwargs["preexec_fn"] = _term_on_pdeathsig
return subprocess.Popen(*args, **kwargs)
2019-04-26 18:27:27 +03:00
2020-04-17 14:53:01 +03:00
def coverage_enabled(binary):
return (
subprocess.run(
2020-04-17 14:53:01 +03:00
f"nm -C {binary} | grep __llvm_coverage_mapping", shell=True, check=False
).returncode
== 0
)
2019-04-26 18:27:27 +03:00
@contextmanager
def sftp_session(hostname):
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(hostname)
try:
session = client.open_sftp()
try:
yield session
finally:
session.close()
finally:
client.close()
def log_errors(out_path, err_path):
2019-08-28 12:57:45 +03:00
error_filter = ["[fail ]", "[fatal]"]
error_lines = []
2019-04-26 18:27:27 +03:00
try:
tail_lines = deque(maxlen=10)
with open(out_path, "r", errors="replace") as lines:
2019-04-26 18:27:27 +03:00
for line in lines:
stripped_line = line.rstrip()
tail_lines.append(stripped_line)
if any(x in stripped_line for x in error_filter):
LOG.error("{}: {}".format(out_path, stripped_line))
error_lines.append(stripped_line)
if error_lines:
LOG.info(
"{} errors found, printing end of output for context:", len(error_lines)
)
for line in tail_lines:
LOG.info(line)
2019-04-26 18:27:27 +03:00
except IOError:
LOG.exception("Could not check output {} for errors".format(out_path))
fatal_error_lines = []
try:
with open(err_path, "r", errors="replace") as lines:
fatal_error_lines = lines.readlines()
if fatal_error_lines:
LOG.error(f"Contents of {err_path}:\n{''.join(fatal_error_lines)}")
except IOError:
LOG.exception("Could not read err output {}".format(err_path))
return error_lines, fatal_error_lines
2019-04-26 18:27:27 +03:00
class CmdMixin(object):
def set_perf(self):
self.cmd = [
"perf",
"record",
"--freq=1000",
"--call-graph=dwarf",
"-s",
] + self.cmd
def _get_perf(self, lines):
pattern = "=> (.*)tx/s"
2019-08-08 18:11:46 +03:00
for line in lines:
LOG.debug(line.decode())
res = re.search(pattern, line.decode())
if res:
return float(res.group(1))
raise ValueError(f"No performance result found (pattern is {pattern})")
2019-08-08 18:11:46 +03:00
2019-04-26 18:27:27 +03:00
class SSHRemote(CmdMixin):
def __init__(
2019-08-22 13:08:15 +03:00
self,
name,
hostname,
exe_files,
data_files,
cmd,
workspace,
label,
common_dir,
2019-08-22 13:08:15 +03:00
env=None,
log_format_json=None,
):
2019-04-26 18:27:27 +03:00
"""
Runs a command on a remote host, through an SSH connection. A temporary
directory is created, and some files can be shipped over. The command is
run out of that directory.
Note that the name matters, since the temporary directory that will be first
deleted, then created and populated is workspace/label_name. There is deliberately no
2019-04-26 18:27:27 +03:00
cleanup on shutdown, to make debugging/inspection possible.
setup() connects, creates the directory and ships over the files
start() runs the specified command
stop() disconnects, which shuts down the command via SIGHUP
"""
self.hostname = hostname
self.exe_files = exe_files
self.data_files = data_files
2019-04-26 18:27:27 +03:00
self.cmd = cmd
self.client = paramiko.SSHClient()
# this client (proc_client) is used to execute commands on the remote host since the main client uses pty
self.proc_client = paramiko.SSHClient()
2019-04-26 18:27:27 +03:00
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.proc_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
self.common_dir = common_dir
self.root = os.path.join(workspace, label + "_" + name)
2019-05-21 11:27:22 +03:00
self.name = name
self.env = env or {}
2019-08-22 13:08:15 +03:00
self.out = os.path.join(self.root, "out")
self.err = os.path.join(self.root, "err")
self.suspension_proc = None
self.pid_file = f"{os.path.basename(self.cmd[0])}.pid"
2020-03-06 15:56:13 +03:00
self._pid = None
2019-04-26 18:27:27 +03:00
def _rc(self, cmd):
LOG.info("[{}] {}".format(self.hostname, cmd))
_, stdout, _ = self.client.exec_command(cmd)
return stdout.channel.recv_exit_status()
def _connect(self):
LOG.debug("[{}] connect".format(self.hostname))
self.client.connect(self.hostname)
self.proc_client.connect(self.hostname)
2019-04-26 18:27:27 +03:00
def _setup_files(self):
assert self._rc("rm -rf {}".format(self.root)) == 0
assert self._rc("mkdir -p {}".format(self.root)) == 0
# For SSHRemote, both executable files (host and enclave) and data
# files (ledger, secrets) are copied to the remote
2019-04-26 18:27:27 +03:00
session = self.client.open_sftp()
for path in self.exe_files:
tgt_path = os.path.join(self.root, os.path.basename(path))
LOG.info("[{}] copy {} from {}".format(self.hostname, tgt_path, path))
session.put(path, tgt_path)
2020-04-01 11:06:55 +03:00
stat = os.stat(path)
session.chmod(tgt_path, stat.st_mode)
for path in self.data_files:
tgt_path = os.path.join(self.root, os.path.basename(path))
2020-06-15 17:58:45 +03:00
if os.path.isdir(path):
session.mkdir(tgt_path)
for f in os.listdir(path):
session.put(os.path.join(path, f), os.path.join(tgt_path, f))
else:
session.put(path, tgt_path)
LOG.info("[{}] copy {} from {}".format(self.hostname, tgt_path, path))
2019-04-26 18:27:27 +03:00
session.close()
def get(
self,
file_name,
dst_path,
timeout=FILE_TIMEOUT,
target_name=None,
pre_condition_func=lambda src_dir, _: True,
):
2019-04-26 18:27:27 +03:00
"""
Get file called `file_name` under the root of the remote. If the
2019-04-26 18:27:27 +03:00
file is missing, wait for timeout, and raise an exception.
2019-06-03 17:16:32 +03:00
If the file is present, it is copied to the CWD on the caller's
host, as `target_name` if it is set.
2019-04-26 18:27:27 +03:00
This call spins up a separate client because we don't want to interrupt
the main cmd that may be running.
"""
with sftp_session(self.hostname) as session:
2020-03-27 16:12:40 +03:00
end_time = time.time() + timeout
start_time = time.time()
while time.time() < end_time:
2019-04-26 18:27:27 +03:00
try:
target_name = target_name or file_name
2020-06-15 17:58:45 +03:00
fileattr = session.lstat(os.path.join(self.root, file_name))
if stat.S_ISDIR(fileattr.st_mode):
src_dir = os.path.join(self.root, file_name)
dst_dir = os.path.join(dst_path, file_name)
if os.path.exists(dst_dir):
shutil.rmtree(dst_dir)
os.makedirs(dst_dir)
if not pre_condition_func(src_dir, session.listdir):
raise RuntimeError(
"Pre-condition for getting remote files failed"
)
2020-06-15 17:58:45 +03:00
for f in session.listdir(src_dir):
session.get(
2020-08-26 18:52:41 +03:00
os.path.join(src_dir, f),
os.path.join(dst_dir, f),
2020-06-15 17:58:45 +03:00
)
else:
session.get(
os.path.join(self.root, file_name),
os.path.join(dst_path, target_name),
)
2019-04-26 18:27:27 +03:00
LOG.debug(
"[{}] found {} after {}s".format(
2020-03-27 16:12:40 +03:00
self.hostname, file_name, int(time.time() - start_time)
2019-04-26 18:27:27 +03:00
)
)
break
except FileNotFoundError:
2020-03-26 18:30:06 +03:00
time.sleep(0.1)
2019-04-26 18:27:27 +03:00
else:
raise ValueError(file_name)
2019-04-26 18:27:27 +03:00
def list_files(self, timeout=FILE_TIMEOUT):
2019-04-26 18:27:27 +03:00
files = []
with sftp_session(self.hostname) as session:
2020-03-27 16:12:40 +03:00
end_time = time.time() + timeout
while time.time() < end_time:
2019-04-26 18:27:27 +03:00
try:
files = session.listdir(self.root)
break
except Exception:
2020-03-26 18:30:06 +03:00
time.sleep(0.1)
2019-04-26 18:27:27 +03:00
else:
raise ValueError(self.root)
return files
def get_logs(self):
with sftp_session(self.hostname) as session:
2019-08-22 13:08:15 +03:00
for filepath in (self.err, self.out):
2019-04-26 18:27:27 +03:00
try:
local_file_name = "{}_{}_{}".format(
2020-08-26 18:52:41 +03:00
self.hostname,
self.name,
os.path.basename(filepath),
2019-05-21 11:27:22 +03:00
)
dst_path = os.path.join(self.common_dir, local_file_name)
session.get(filepath, dst_path)
LOG.info("Downloaded {}".format(dst_path))
except FileNotFoundError:
2019-04-26 18:27:27 +03:00
LOG.warning(
"Failed to download {} to {} (host: {})".format(
filepath, dst_path, self.hostname
)
2019-04-26 18:27:27 +03:00
)
2019-08-28 17:32:40 +03:00
def start(self):
2019-04-26 18:27:27 +03:00
"""
Start cmd on the remote host. stdout and err are captured to file locally.
We create a pty on the remote host under which to run the command, so as to
2019-04-26 18:27:27 +03:00
get a SIGHUP on disconnection.
"""
2020-04-17 14:53:01 +03:00
cmd = self.get_cmd()
2019-04-26 18:27:27 +03:00
LOG.info("[{}] {}".format(self.hostname, cmd))
2020-03-06 15:56:13 +03:00
self.client.exec_command(cmd, get_pty=True)
self.pid()
def pid(self):
if self._pid is None:
pid_path = os.path.join(self.root, self.pid_file)
2020-03-06 15:56:13 +03:00
time_left = 3
while time_left > 0:
_, stdout, _ = self.proc_client.exec_command(f'cat "{pid_path}"')
res = stdout.read().strip()
if res:
self._pid = int(res)
2020-03-06 15:56:13 +03:00
break
time_left = max(time_left - 0.1, 0)
if not time_left:
raise TimeoutError("Failed to read PID from file")
time.sleep(0.1)
return self._pid
def suspend(self):
2020-03-06 15:56:13 +03:00
_, stdout, _ = self.proc_client.exec_command(f"kill -STOP {self.pid()}")
2020-05-26 16:11:49 +03:00
if stdout.channel.recv_exit_status() != 0:
raise RuntimeError(f"Remote {self.name} could not be suspended")
def resume(self):
2020-03-06 15:56:13 +03:00
_, stdout, _ = self.proc_client.exec_command(f"kill -CONT {self.pid()}")
if stdout.channel.recv_exit_status() != 0:
2020-05-26 16:11:49 +03:00
raise RuntimeError(f"Could not resume remote {self.name} from suspension!")
2019-04-26 18:27:27 +03:00
def stop(self):
"""
Disconnect the client, and therefore shut down the command as well.
"""
LOG.info("[{}] closing".format(self.hostname))
self.get_logs()
errors, fatal_errors = log_errors(
os.path.join(self.common_dir, "{}_{}_out".format(self.hostname, self.name)),
os.path.join(self.common_dir, "{}_{}_err".format(self.hostname, self.name)),
2019-05-21 11:27:22 +03:00
)
2019-04-26 18:27:27 +03:00
self.client.close()
self.proc_client.close()
return errors, fatal_errors
2019-04-26 18:27:27 +03:00
def setup(self):
"""
Connect to the remote host, empty the temporary directory if it exsits,
and populate it with the initial set of files.
"""
self._connect()
self._setup_files()
2020-04-17 14:53:01 +03:00
def get_cmd(self):
env = " ".join(f"{key}={value}" for key, value in self.env.items())
cmd = " ".join(self.cmd)
return f"cd {self.root} && {env} {cmd} 1> {self.out} 2> {self.err} 0< /dev/null"
2019-04-26 18:27:27 +03:00
2020-04-21 10:19:05 +03:00
def debug_node_cmd(self):
2019-08-22 13:08:15 +03:00
cmd = " ".join(self.cmd)
return f"cd {self.root} && {DBG} --args {cmd}"
2019-04-26 18:27:27 +03:00
2019-06-07 18:35:22 +03:00
def _connect_new(self):
2019-04-26 18:27:27 +03:00
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(self.hostname)
2019-06-07 18:35:22 +03:00
return client
def check_done(self):
2019-06-07 18:35:22 +03:00
client = self._connect_new()
2019-04-26 18:27:27 +03:00
try:
_, stdout, _ = client.exec_command(f"ps -p {self.pid()}")
return stdout.channel.recv_exit_status() == 1
2019-04-26 18:27:27 +03:00
finally:
client.close()
def get_result(self, line_count):
2019-06-07 18:35:22 +03:00
client = self._connect_new()
try:
_, stdout, _ = client.exec_command(f"tail -{line_count} {self.out}")
2019-06-07 18:35:22 +03:00
if stdout.channel.recv_exit_status() == 0:
lines = stdout.read().splitlines()
result = lines[-line_count:]
return self._get_perf(result)
2019-06-07 18:35:22 +03:00
finally:
client.close()
2019-04-26 18:27:27 +03:00
@contextmanager
2020-04-17 12:03:29 +03:00
def ssh_remote(*args, **kwargs):
2019-04-26 18:27:27 +03:00
"""
Context Manager wrapper for SSHRemote
"""
2020-04-17 12:03:29 +03:00
remote = SSHRemote(*args, **kwargs)
2019-04-26 18:27:27 +03:00
try:
remote.setup()
remote.start()
yield remote
finally:
remote.stop()
class LocalRemote(CmdMixin):
def __init__(
2019-08-22 13:08:15 +03:00
self,
name,
hostname,
exe_files,
data_files,
cmd,
workspace,
label,
common_dir,
2019-08-22 13:08:15 +03:00
env=None,
log_format_json=None,
):
2019-04-26 18:27:27 +03:00
"""
Local Equivalent to the SSHRemote
"""
self.hostname = hostname
self.exe_files = exe_files
self.data_files = data_files
2019-04-26 18:27:27 +03:00
self.cmd = cmd
self.root = os.path.join(workspace, label + "_" + name)
self.common_dir = common_dir
2019-04-26 18:27:27 +03:00
self.proc = None
self.stdout = None
self.stderr = None
self.env = env
2019-06-07 18:35:22 +03:00
self.name = name
2019-08-22 13:08:15 +03:00
self.out = os.path.join(self.root, "out")
self.err = os.path.join(self.root, "err")
2019-04-26 18:27:27 +03:00
def _rc(self, cmd):
LOG.info("[{}] {}".format(self.hostname, cmd))
return subprocess.call(cmd, shell=True)
2020-06-15 17:58:45 +03:00
def _cp(self, src_path, dst_path):
if os.path.isdir(src_path):
assert (
self._rc(
"rm -rf {}".format(
os.path.join(dst_path, os.path.basename(src_path))
)
)
== 0
)
assert self._rc("cp -r {} {}".format(src_path, dst_path)) == 0
else:
assert self._rc("cp {} {}".format(src_path, dst_path)) == 0
2019-04-26 18:27:27 +03:00
def _setup_files(self):
assert self._rc("rm -rf {}".format(self.root)) == 0
assert self._rc("mkdir -p {}".format(self.root)) == 0
for path in self.exe_files:
2020-03-03 20:43:12 +03:00
dst_path = os.path.normpath(os.path.join(self.root, os.path.basename(path)))
src_path = os.path.normpath(os.path.join(os.getcwd(), path))
assert self._rc("ln -s {} {}".format(src_path, dst_path)) == 0
for path in self.data_files:
dst_path = os.path.join(self.root, os.path.basename(path))
2020-06-15 17:58:45 +03:00
self._cp(path, dst_path)
def get(
self,
src_path,
dst_path,
timeout=FILE_TIMEOUT,
target_name=None,
pre_condition_func=lambda src_dir, _: True,
):
path = os.path.join(self.root, src_path)
2020-03-27 16:12:40 +03:00
end_time = time.time() + timeout
while time.time() < end_time:
2019-04-26 18:27:27 +03:00
if os.path.exists(path):
break
2020-03-26 18:30:06 +03:00
time.sleep(0.1)
2019-04-26 18:27:27 +03:00
else:
raise ValueError(path)
if not pre_condition_func(path, os.listdir):
raise RuntimeError("Pre-condition for getting remote files failed")
if target_name is not None:
self._cp(path, os.path.join(dst_path, target_name))
else:
self._cp(path, dst_path)
2019-04-26 18:27:27 +03:00
def list_files(self):
return os.listdir(self.root)
def start(self):
2019-04-26 18:27:27 +03:00
"""
Start cmd. stdout and err are captured to file locally.
"""
2020-04-17 14:53:01 +03:00
cmd = self.get_cmd()
LOG.info(f"[{self.hostname}] {cmd} (env: {self.env})")
2019-08-22 13:08:15 +03:00
self.stdout = open(self.out, "wb")
self.stderr = open(self.err, "wb")
self.proc = popen(
self.cmd,
cwd=self.root,
stdout=self.stdout,
stderr=self.stderr,
env=self.env,
2019-04-26 18:27:27 +03:00
)
def suspend(self):
self.proc.send_signal(signal.SIGSTOP)
def resume(self):
self.proc.send_signal(signal.SIGCONT)
2019-04-26 18:27:27 +03:00
def stop(self):
"""
Disconnect the client, and therefore shut down the command as well.
"""
LOG.info("[{}] closing".format(self.hostname))
if self.proc:
self.proc.terminate()
2020-01-30 16:28:38 +03:00
self.proc.wait(10)
2019-04-26 18:27:27 +03:00
if self.stdout:
self.stdout.close()
if self.stderr:
self.stderr.close()
return log_errors(self.out, self.err)
2019-04-26 18:27:27 +03:00
def setup(self):
"""
Empty the temporary directory if it exists,
and populate it with the initial set of files.
"""
self._setup_files()
2020-04-17 14:53:01 +03:00
def get_cmd(self):
2019-08-22 13:08:15 +03:00
cmd = " ".join(self.cmd)
return f"cd {self.root} && {cmd} 1> {self.out} 2> {self.err}"
2019-04-26 18:27:27 +03:00
2020-04-21 10:19:05 +03:00
def debug_node_cmd(self):
2019-08-22 13:08:15 +03:00
cmd = " ".join(self.cmd)
return f"cd {self.root} && {DBG} --args {cmd}"
2019-04-26 18:27:27 +03:00
def check_done(self):
return self.proc.poll() is not None
2019-10-11 17:14:31 +03:00
def get_result(self, line_count):
2019-08-22 13:08:15 +03:00
with open(self.out, "rb") as out:
2019-06-07 18:35:22 +03:00
lines = out.read().splitlines()
result = lines[-line_count:]
return self._get_perf(result)
2019-06-07 18:35:22 +03:00
2019-04-26 18:27:27 +03:00
CCF_TO_OE_LOG_LEVEL = {
"trace": "VERBOSE",
"debug": "INFO",
"info": "WARNING",
"fail": "ERROR",
"fatal": "FATAL",
}
def make_address(host, port=None):
if port is not None:
return f"{host}:{port}"
return host
2019-04-26 18:27:27 +03:00
class CCFRemote(object):
BIN = "cchost"
DEPS = []
def __init__(
self,
2019-08-28 12:57:45 +03:00
start_type,
2019-04-26 18:27:27 +03:00
lib_path,
local_node_id,
2019-04-26 18:27:27 +03:00
host,
pubhost,
2019-08-06 19:38:19 +03:00
node_port,
rpc_port,
2019-04-26 18:27:27 +03:00
remote_class,
enclave_type,
workspace,
label,
common_dir,
2019-08-28 12:57:45 +03:00
target_rpc_address=None,
members_info=None,
2020-08-27 14:13:40 +03:00
snapshot_dir=None,
join_timer=None,
2019-08-28 12:57:45 +03:00
host_log_level="info",
2020-08-27 10:17:42 +03:00
sig_tx_interval=5000,
sig_ms_interval=1000,
2020-03-09 15:52:43 +03:00
raft_election_timeout=1000,
pbft_view_change_timeout=5000,
2020-09-09 12:59:57 +03:00
consensus="cft",
worker_threads=0,
memory_reserve_startup=0,
2019-08-28 12:57:45 +03:00
gov_script=None,
2020-06-15 17:58:45 +03:00
ledger_dir=None,
read_only_ledger_dir=None,
log_format_json=None,
binary_dir=".",
2020-08-27 10:17:42 +03:00
ledger_chunk_bytes=(5 * 1000 * 1000),
2020-05-29 18:08:53 +03:00
domain=None,
2020-08-27 10:17:42 +03:00
snapshot_tx_interval=None,
2019-04-26 18:27:27 +03:00
):
"""
Run a ccf binary on a remote host.
"""
2019-08-28 12:57:45 +03:00
self.start_type = start_type
self.local_node_id = local_node_id
self.pem = f"{local_node_id}.pem"
self.node_address_path = f"{local_node_id}.node_address"
self.rpc_address_path = f"{local_node_id}.rpc_address"
self.BIN = infra.path.build_bin_path(
self.BIN, enclave_type, binary_dir=binary_dir
)
self.common_dir = common_dir
2020-06-15 17:58:45 +03:00
self.ledger_dir = os.path.normpath(ledger_dir) if ledger_dir else None
2020-06-15 17:58:45 +03:00
self.ledger_dir_name = (
os.path.basename(self.ledger_dir)
if self.ledger_dir
else f"{local_node_id}.ledger"
2019-04-26 18:27:27 +03:00
)
2020-08-27 14:13:40 +03:00
self.snapshot_dir = os.path.normpath(snapshot_dir) if snapshot_dir else None
self.snapshot_dir_name = (
os.path.basename(self.snapshot_dir) if self.snapshot_dir else "snapshots"
)
self.read_ledger_dir_name = (
os.path.basename(read_only_ledger_dir) if read_only_ledger_dir else None
)
2019-04-26 18:27:27 +03:00
2019-08-28 12:57:45 +03:00
exe_files = [self.BIN, lib_path] + self.DEPS
2020-06-15 17:58:45 +03:00
data_files = [self.ledger_dir] if self.ledger_dir else []
2020-09-29 14:37:15 +03:00
data_files += [self.snapshot_dir] if self.snapshot_dir else []
2019-08-28 12:57:45 +03:00
2020-04-01 11:06:55 +03:00
# exe_files may be relative or absolute. The remote implementation should
2020-02-07 17:29:02 +03:00
# copy (or symlink) to the target workspace, and then node will be able
# to reference the destination file locally in the target workspace.
2020-04-01 11:06:55 +03:00
bin_path = os.path.join(".", os.path.basename(self.BIN))
2020-02-07 17:29:02 +03:00
enclave_path = os.path.join(".", os.path.basename(lib_path))
2020-03-09 15:52:43 +03:00
election_timeout_arg = (
f"--pbft_view-change-timeout-ms={pbft_view_change_timeout}"
2020-09-09 12:59:57 +03:00
if consensus == "bft"
2020-03-09 15:52:43 +03:00
else f"--raft-election-timeout-ms={raft_election_timeout}"
)
2019-08-28 17:32:40 +03:00
cmd = [
2020-04-01 11:06:55 +03:00
bin_path,
2020-02-07 17:29:02 +03:00
f"--enclave-file={enclave_path}",
2019-08-28 17:32:40 +03:00
f"--enclave-type={enclave_type}",
f"--node-address={make_address(host, node_port)}",
f"--node-address-file={self.node_address_path}",
f"--rpc-address={make_address(host, rpc_port)}",
f"--rpc-address-file={self.rpc_address_path}",
f"--public-rpc-address={make_address(pubhost, rpc_port)}",
2020-06-15 17:58:45 +03:00
f"--ledger-dir={self.ledger_dir_name}",
2019-08-28 17:32:40 +03:00
f"--node-cert-file={self.pem}",
f"--host-log-level={host_log_level}",
2020-03-09 15:52:43 +03:00
election_timeout_arg,
f"--consensus={consensus}",
2020-03-27 16:12:40 +03:00
f"--worker-threads={worker_threads}",
2019-08-28 17:32:40 +03:00
]
if log_format_json:
cmd += ["--log-format-json"]
2019-09-04 14:03:33 +03:00
2020-08-27 10:17:42 +03:00
if sig_tx_interval:
cmd += [f"--sig-tx-interval={sig_tx_interval}"]
2019-08-28 17:32:40 +03:00
2020-08-27 10:17:42 +03:00
if sig_ms_interval:
cmd += [f"--sig-ms-interval={sig_ms_interval}"]
2019-08-28 17:32:40 +03:00
if memory_reserve_startup:
cmd += [f"--memory-reserve-startup={memory_reserve_startup}"]
2020-08-27 10:17:42 +03:00
if ledger_chunk_bytes:
cmd += [f"--ledger-chunk-bytes={ledger_chunk_bytes}"]
2020-06-15 17:58:45 +03:00
2020-05-29 18:08:53 +03:00
if domain:
cmd += [f"--domain={domain}"]
2020-08-27 10:17:42 +03:00
if snapshot_tx_interval:
cmd += [f"--snapshot-tx-interval={snapshot_tx_interval}"]
if read_only_ledger_dir:
cmd += [f"--read-only-ledger-dir={self.read_ledger_dir_name}"]
data_files += [os.path.join(self.common_dir, read_only_ledger_dir)]
if start_type == StartType.new:
2019-08-28 17:32:40 +03:00
cmd += [
"start",
"--network-cert-file=networkcert.pem",
2019-08-28 17:32:40 +03:00
f"--gov-script={os.path.basename(gov_script)}",
]
if members_info is None:
raise ValueError(
2020-10-09 19:10:37 +03:00
"Starting node should be given at least one tuple of (member certificate, member public encryption key[, member data])"
)
2020-10-09 19:10:37 +03:00
for mi in members_info:
for mf in mi:
data_files.append(os.path.join(self.common_dir, mf))
cmd += [f"--member-info={','.join(mi)}"]
data_files += [os.path.join(os.path.basename(self.common_dir), gov_script)]
2019-08-28 17:32:40 +03:00
elif start_type == StartType.join:
cmd += [
"join",
"--network-cert-file=networkcert.pem",
f"--target-rpc-address={target_rpc_address}",
f"--join-timer={join_timer}",
2019-08-28 17:32:40 +03:00
]
data_files += [os.path.join(self.common_dir, "networkcert.pem")]
2019-08-28 17:32:40 +03:00
elif start_type == StartType.recover:
cmd += ["recover", "--network-cert-file=networkcert.pem"]
2019-08-28 17:32:40 +03:00
else:
raise ValueError(
f"Unexpected CCFRemote start type {start_type}. Should be start, join or recover"
)
env = {}
2019-05-28 18:33:57 +03:00
self.profraw = None
2019-08-09 15:53:46 +03:00
if enclave_type == "virtual":
env["UBSAN_OPTIONS"] = "print_stacktrace=1"
if coverage_enabled(lib_path):
self.profraw = f"{uuid.uuid4()}-{local_node_id}_{os.path.basename(lib_path)}.profraw"
env["LLVM_PROFILE_FILE"] = self.profraw
2019-05-28 18:33:57 +03:00
2019-08-28 12:57:45 +03:00
oe_log_level = CCF_TO_OE_LOG_LEVEL.get(host_log_level)
if oe_log_level:
env["OE_LOG_LEVEL"] = oe_log_level
2019-04-26 18:27:27 +03:00
self.remote = remote_class(
2019-08-29 18:01:57 +03:00
local_node_id,
host,
exe_files,
data_files,
cmd,
workspace,
label,
common_dir,
2019-08-29 18:01:57 +03:00
env,
log_format_json,
2019-04-26 18:27:27 +03:00
)
def setup(self):
self.remote.setup()
def start(self):
2019-08-28 17:32:40 +03:00
self.remote.start()
def suspend(self):
return self.remote.suspend()
def resume(self):
self.remote.resume()
def get_startup_files(self, dst_path):
self.remote.get(self.pem, dst_path)
self.remote.get(self.node_address_path, dst_path)
self.remote.get(self.rpc_address_path, dst_path)
if self.start_type in {StartType.new, StartType.recover}:
self.remote.get("networkcert.pem", dst_path)
self.remote.get("network_enc_pubk.pem", dst_path)
2019-04-26 18:27:27 +03:00
def debug_node_cmd(self):
2020-04-17 14:53:01 +03:00
return self.remote.debug_node_cmd()
2019-04-26 18:27:27 +03:00
def stop(self):
errors, fatal_errors = [], []
2019-04-26 18:27:27 +03:00
try:
errors, fatal_errors = self.remote.stop()
2019-04-26 18:27:27 +03:00
except Exception:
LOG.exception("Failed to shut down {} cleanly".format(self.local_node_id))
2019-05-28 18:33:57 +03:00
if self.profraw:
try:
self.remote.get(self.profraw, self.common_dir)
2019-05-28 18:33:57 +03:00
except Exception:
LOG.info(f"Could not retrieve {self.profraw}")
return errors, fatal_errors
2019-04-26 18:27:27 +03:00
def check_done(self):
return self.remote.check_done()
2019-04-26 18:27:27 +03:00
def set_perf(self):
self.remote.set_perf()
def get_ledger(self):
2020-06-15 17:58:45 +03:00
self.remote.get(self.ledger_dir_name, self.common_dir)
return os.path.join(self.common_dir, self.ledger_dir_name)
2019-04-26 18:27:27 +03:00
def get_committed_snapshots(self, pre_condition_func=lambda src_dir, _: True):
self.remote.get(
self.snapshot_dir_name,
self.common_dir,
pre_condition_func=pre_condition_func,
)
2020-08-27 14:13:40 +03:00
return os.path.join(self.common_dir, self.snapshot_dir_name)
def ledger_path(self):
2020-06-15 17:58:45 +03:00
return os.path.join(self.remote.root, self.ledger_dir_name)
2019-04-26 18:27:27 +03:00
@contextmanager
2020-04-17 12:03:29 +03:00
def ccf_remote(*args, **kwargs):
2019-04-26 18:27:27 +03:00
"""
Context Manager wrapper for CCFRemote
"""
2020-04-17 12:03:29 +03:00
remote = CCFRemote(*args, **kwargs)
2019-04-26 18:27:27 +03:00
try:
remote.setup()
remote.start()
yield remote
finally:
remote.stop()
2019-08-28 12:57:45 +03:00
class StartType(Enum):
new = 0
2019-08-28 12:57:45 +03:00
join = 1
recover = 2