2019-04-26 18:27:27 +03:00
|
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
# Licensed under the Apache 2.0 License.
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
from enum import Enum
|
|
|
|
import paramiko
|
|
|
|
import logging
|
|
|
|
import subprocess
|
|
|
|
import getpass
|
|
|
|
from contextlib import contextmanager
|
|
|
|
import infra.path
|
|
|
|
import json
|
2019-05-29 12:42:17 +03:00
|
|
|
import uuid
|
2019-06-14 12:56:35 +03:00
|
|
|
import ctypes
|
|
|
|
import signal
|
2019-08-08 18:11:46 +03:00
|
|
|
import re
|
2019-08-19 17:40:15 +03:00
|
|
|
from collections import deque
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
from loguru import logger as LOG
|
|
|
|
|
|
|
|
DBG = os.getenv("DBG", "cgdb")
|
|
|
|
|
2019-06-14 12:56:35 +03:00
|
|
|
_libc = ctypes.CDLL("libc.so.6")
|
|
|
|
|
|
|
|
|
|
|
|
def _term_on_pdeathsig():
|
|
|
|
# usr/include/linux/prctl.h: #define PR_SET_PDEATHSIG 1
|
|
|
|
_libc.prctl(1, signal.SIGTERM)
|
|
|
|
|
|
|
|
|
|
|
|
def popen(*args, **kwargs):
|
|
|
|
kwargs["preexec_fn"] = _term_on_pdeathsig
|
|
|
|
return subprocess.Popen(*args, **kwargs)
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-06-25 17:37:05 +03:00
|
|
|
def coverage_enabled(bin):
|
|
|
|
return (
|
|
|
|
subprocess.run(
|
|
|
|
f"nm -C {bin} | grep __llvm_coverage_mapping", shell=True
|
|
|
|
).returncode
|
|
|
|
== 0
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
@contextmanager
|
|
|
|
def sftp_session(hostname):
|
|
|
|
client = paramiko.SSHClient()
|
|
|
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
|
|
client.connect(hostname)
|
|
|
|
try:
|
|
|
|
session = client.open_sftp()
|
|
|
|
try:
|
|
|
|
yield session
|
|
|
|
finally:
|
|
|
|
session.close()
|
|
|
|
finally:
|
|
|
|
client.close()
|
|
|
|
|
|
|
|
|
|
|
|
def log_errors(out_path, err_path):
|
2019-08-28 12:57:45 +03:00
|
|
|
error_filter = ["[fail ]", "[fatal]"]
|
2019-12-20 13:44:50 +03:00
|
|
|
error_lines = []
|
2019-04-26 18:27:27 +03:00
|
|
|
try:
|
|
|
|
errors = 0
|
2019-09-13 16:37:06 +03:00
|
|
|
tail_lines = deque(maxlen=10)
|
2019-10-22 16:52:14 +03:00
|
|
|
with open(out_path, "r", errors="replace") as lines:
|
2019-04-26 18:27:27 +03:00
|
|
|
for line in lines:
|
2019-08-19 17:40:15 +03:00
|
|
|
stripped_line = line.rstrip()
|
|
|
|
tail_lines.append(stripped_line)
|
|
|
|
if any(x in stripped_line for x in error_filter):
|
|
|
|
LOG.error("{}: {}".format(out_path, stripped_line))
|
2019-12-20 13:44:50 +03:00
|
|
|
error_lines.append(stripped_line)
|
2019-04-26 18:27:27 +03:00
|
|
|
errors += 1
|
|
|
|
if errors:
|
2019-08-19 17:40:15 +03:00
|
|
|
LOG.info("{} errors found, printing end of output for context:", errors)
|
|
|
|
for line in tail_lines:
|
|
|
|
LOG.info(line)
|
2019-04-26 18:27:27 +03:00
|
|
|
try:
|
|
|
|
with open(err_path, "r") as lines:
|
2019-08-19 17:40:15 +03:00
|
|
|
LOG.error("contents of {}:".format(err_path))
|
2019-04-26 18:27:27 +03:00
|
|
|
LOG.error(lines.read())
|
|
|
|
except IOError:
|
|
|
|
LOG.exception("Could not read err output {}".format(err_path))
|
|
|
|
except IOError:
|
|
|
|
LOG.exception("Could not check output {} for errors".format(out_path))
|
2019-12-20 13:44:50 +03:00
|
|
|
return error_lines
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
|
|
|
|
class CmdMixin(object):
|
|
|
|
def set_perf(self):
|
|
|
|
self.cmd = [
|
|
|
|
"perf",
|
|
|
|
"record",
|
|
|
|
"--freq=1000",
|
|
|
|
"--call-graph=dwarf",
|
|
|
|
"-s",
|
|
|
|
] + self.cmd
|
|
|
|
|
2019-08-08 18:11:46 +03:00
|
|
|
def _print_upload_perf(self, name, metrics, lines):
|
|
|
|
for line in lines:
|
|
|
|
LOG.debug(line.decode())
|
2019-12-03 14:13:24 +03:00
|
|
|
if metrics is not None:
|
|
|
|
res = re.search("=> (.*)tx/s", line.decode())
|
|
|
|
if res:
|
2019-12-16 14:55:33 +03:00
|
|
|
LOG.success(f"METRICS: {name} = {float(res.group(1))}")
|
2019-12-03 14:13:24 +03:00
|
|
|
metrics.put(name, float(res.group(1)))
|
2019-08-08 18:11:46 +03:00
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
class SSHRemote(CmdMixin):
|
2019-06-06 00:43:40 +03:00
|
|
|
def __init__(
|
2019-08-22 13:08:15 +03:00
|
|
|
self,
|
|
|
|
name,
|
|
|
|
hostname,
|
|
|
|
exe_files,
|
|
|
|
data_files,
|
|
|
|
cmd,
|
|
|
|
workspace,
|
|
|
|
label,
|
|
|
|
env=None,
|
2019-09-04 14:03:33 +03:00
|
|
|
json_log_path=None,
|
2019-06-06 00:43:40 +03:00
|
|
|
):
|
2019-04-26 18:27:27 +03:00
|
|
|
"""
|
|
|
|
Runs a command on a remote host, through an SSH connection. A temporary
|
|
|
|
directory is created, and some files can be shipped over. The command is
|
|
|
|
run out of that directory.
|
|
|
|
|
|
|
|
Note that the name matters, since the temporary directory that will be first
|
2019-06-06 00:43:40 +03:00
|
|
|
deleted, then created and populated is workspace/label_name. There is deliberately no
|
2019-04-26 18:27:27 +03:00
|
|
|
cleanup on shutdown, to make debugging/inspection possible.
|
|
|
|
|
|
|
|
setup() connects, creates the directory and ships over the files
|
|
|
|
start() runs the specified command
|
|
|
|
stop() disconnects, which shuts down the command via SIGHUP
|
|
|
|
"""
|
|
|
|
self.hostname = hostname
|
2019-06-06 00:43:40 +03:00
|
|
|
# For SSHRemote, both executable files (host and enclave) and data
|
|
|
|
# files (ledger, secrets) are copied to the remote
|
|
|
|
self.files = exe_files
|
|
|
|
self.files += data_files
|
2019-04-26 18:27:27 +03:00
|
|
|
self.cmd = cmd
|
|
|
|
self.client = paramiko.SSHClient()
|
2019-11-06 13:11:32 +03:00
|
|
|
# this client (proc_client) is used to execute commands on the remote host since the main client uses pty
|
|
|
|
self.proc_client = paramiko.SSHClient()
|
2019-04-26 18:27:27 +03:00
|
|
|
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
2019-11-06 13:11:32 +03:00
|
|
|
self.proc_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
2019-06-06 00:43:40 +03:00
|
|
|
self.root = os.path.join(workspace, label + "_" + name)
|
2019-05-21 11:27:22 +03:00
|
|
|
self.name = name
|
2019-05-24 15:46:17 +03:00
|
|
|
self.env = env or {}
|
2019-08-22 13:08:15 +03:00
|
|
|
self.out = os.path.join(self.root, "out")
|
|
|
|
self.err = os.path.join(self.root, "err")
|
2019-11-06 13:11:32 +03:00
|
|
|
self.suspension_proc = None
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def _rc(self, cmd):
|
|
|
|
LOG.info("[{}] {}".format(self.hostname, cmd))
|
|
|
|
_, stdout, _ = self.client.exec_command(cmd)
|
|
|
|
return stdout.channel.recv_exit_status()
|
|
|
|
|
|
|
|
def _connect(self):
|
|
|
|
LOG.debug("[{}] connect".format(self.hostname))
|
|
|
|
self.client.connect(self.hostname)
|
2019-11-06 13:11:32 +03:00
|
|
|
self.proc_client.connect(self.hostname)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def _setup_files(self):
|
|
|
|
assert self._rc("rm -rf {}".format(self.root)) == 0
|
2019-06-12 19:59:18 +03:00
|
|
|
assert self._rc("mkdir -p {}".format(self.root)) == 0
|
2019-04-26 18:27:27 +03:00
|
|
|
session = self.client.open_sftp()
|
|
|
|
for path in self.files:
|
2019-10-23 10:49:22 +03:00
|
|
|
tgt_path = os.path.join(self.root, os.path.basename(path))
|
|
|
|
LOG.info("[{}] copy {} from {}".format(self.hostname, tgt_path, path))
|
|
|
|
session.put(path, tgt_path)
|
2019-04-26 18:27:27 +03:00
|
|
|
session.close()
|
|
|
|
executable = self.cmd[0]
|
|
|
|
if executable.startswith("./"):
|
|
|
|
executable = executable[2:]
|
|
|
|
assert self._rc("chmod +x {}".format(os.path.join(self.root, executable))) == 0
|
|
|
|
|
2019-06-03 17:16:32 +03:00
|
|
|
def get(self, filename, timeout=60, targetname=None):
|
2019-04-26 18:27:27 +03:00
|
|
|
"""
|
|
|
|
Get file called `filename` under the root of the remote. If the
|
|
|
|
file is missing, wait for timeout, and raise an exception.
|
|
|
|
|
2019-06-03 17:16:32 +03:00
|
|
|
If the file is present, it is copied to the CWD on the caller's
|
|
|
|
host, as `targetname` if it is set.
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
This call spins up a separate client because we don't want to interrupt
|
|
|
|
the main cmd that may be running.
|
|
|
|
"""
|
|
|
|
with sftp_session(self.hostname) as session:
|
|
|
|
for seconds in range(timeout):
|
|
|
|
try:
|
2019-06-03 17:16:32 +03:00
|
|
|
targetname = targetname or filename
|
|
|
|
session.get(os.path.join(self.root, filename), targetname)
|
2019-04-26 18:27:27 +03:00
|
|
|
LOG.debug(
|
|
|
|
"[{}] found {} after {}s".format(
|
|
|
|
self.hostname, filename, seconds
|
|
|
|
)
|
|
|
|
)
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
time.sleep(1)
|
|
|
|
else:
|
|
|
|
raise ValueError(filename)
|
|
|
|
|
|
|
|
def list_files(self, timeout=60):
|
|
|
|
files = []
|
|
|
|
with sftp_session(self.hostname) as session:
|
|
|
|
for seconds in range(timeout):
|
|
|
|
try:
|
|
|
|
files = session.listdir(self.root)
|
|
|
|
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise ValueError(self.root)
|
|
|
|
return files
|
|
|
|
|
|
|
|
def get_logs(self):
|
|
|
|
with sftp_session(self.hostname) as session:
|
2019-08-22 13:08:15 +03:00
|
|
|
for filepath in (self.err, self.out):
|
2019-04-26 18:27:27 +03:00
|
|
|
try:
|
2019-05-21 11:27:22 +03:00
|
|
|
local_filepath = "{}_{}_{}".format(
|
2019-10-11 17:14:31 +03:00
|
|
|
self.hostname, os.path.basename(filepath), self.name
|
2019-05-21 11:27:22 +03:00
|
|
|
)
|
2019-04-26 18:27:27 +03:00
|
|
|
session.get(filepath, local_filepath)
|
|
|
|
LOG.info("Downloaded {}".format(local_filepath))
|
|
|
|
except Exception:
|
|
|
|
LOG.warning(
|
|
|
|
"Failed to download {} from {}".format(filepath, self.hostname)
|
|
|
|
)
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
def start(self):
|
2019-04-26 18:27:27 +03:00
|
|
|
"""
|
|
|
|
Start cmd on the remote host. stdout and err are captured to file locally.
|
|
|
|
|
2019-06-06 00:43:40 +03:00
|
|
|
We create a pty on the remote host under which to run the command, so as to
|
2019-04-26 18:27:27 +03:00
|
|
|
get a SIGHUP on disconnection.
|
|
|
|
"""
|
|
|
|
cmd = self._cmd()
|
|
|
|
LOG.info("[{}] {}".format(self.hostname, cmd))
|
2019-06-06 00:43:40 +03:00
|
|
|
stdin, stdout, stderr = self.client.exec_command(cmd, get_pty=True)
|
2019-11-06 13:11:32 +03:00
|
|
|
_, stdout_, _ = self.proc_client.exec_command(f'ps -ef | pgrep -f "{cmd}"')
|
|
|
|
self.pid = stdout_.readline()
|
|
|
|
|
|
|
|
def suspend(self):
|
|
|
|
_, stdout, _ = self.proc_client.exec_command(f"kill -STOP {self.pid}")
|
|
|
|
if stdout.channel.recv_exit_status() == 0:
|
|
|
|
LOG.info(f"Node {self.name} suspended...")
|
|
|
|
else:
|
|
|
|
raise RuntimeError(f"Node {self.name} could not be suspended")
|
|
|
|
|
|
|
|
def resume(self):
|
|
|
|
_, stdout, _ = self.proc_client.exec_command(f"kill -CONT {self.pid}")
|
|
|
|
if stdout.channel.recv_exit_status() != 0:
|
|
|
|
raise RuntimeError(f"Could not resume node {self.name} from suspension!")
|
|
|
|
LOG.info(f"Node {self.name} resuming from suspension...")
|
2019-06-06 00:43:40 +03:00
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
def stop(self):
|
|
|
|
"""
|
|
|
|
Disconnect the client, and therefore shut down the command as well.
|
|
|
|
"""
|
|
|
|
LOG.info("[{}] closing".format(self.hostname))
|
|
|
|
self.get_logs()
|
2019-12-20 13:44:50 +03:00
|
|
|
errors = log_errors(
|
2019-05-21 11:27:22 +03:00
|
|
|
"{}_out_{}".format(self.hostname, self.name),
|
|
|
|
"{}_err_{}".format(self.hostname, self.name),
|
|
|
|
)
|
2019-04-26 18:27:27 +03:00
|
|
|
self.client.close()
|
2019-11-06 13:11:32 +03:00
|
|
|
self.proc_client.close()
|
2019-12-20 13:44:50 +03:00
|
|
|
return errors
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def setup(self):
|
|
|
|
"""
|
|
|
|
Connect to the remote host, empty the temporary directory if it exsits,
|
|
|
|
and populate it with the initial set of files.
|
|
|
|
"""
|
|
|
|
self._connect()
|
|
|
|
self._setup_files()
|
|
|
|
|
|
|
|
def _cmd(self):
|
2019-05-24 15:46:17 +03:00
|
|
|
env = " ".join(f"{key}={value}" for key, value in self.env.items())
|
|
|
|
cmd = " ".join(self.cmd)
|
2019-08-22 13:08:15 +03:00
|
|
|
return f"cd {self.root} && {env} ./{cmd} 1>{self.out} 2>{self.err} 0</dev/null"
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def _dbg(self):
|
2019-08-22 13:08:15 +03:00
|
|
|
cmd = " ".join(self.cmd)
|
2019-11-25 15:11:07 +03:00
|
|
|
return f"cd {self.root} && {DBG} --args {cmd}"
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-06-07 18:35:22 +03:00
|
|
|
def _connect_new(self):
|
2019-04-26 18:27:27 +03:00
|
|
|
client = paramiko.SSHClient()
|
|
|
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
|
|
client.connect(self.hostname)
|
2019-06-07 18:35:22 +03:00
|
|
|
return client
|
|
|
|
|
|
|
|
def wait_for_stdout_line(self, line, timeout):
|
|
|
|
client = self._connect_new()
|
2019-04-26 18:27:27 +03:00
|
|
|
try:
|
|
|
|
for _ in range(timeout):
|
2019-10-11 17:14:31 +03:00
|
|
|
_, stdout, _ = client.exec_command(f"grep -F '{line}' {self.out}")
|
2019-04-26 18:27:27 +03:00
|
|
|
if stdout.channel.recv_exit_status() == 0:
|
|
|
|
return
|
|
|
|
time.sleep(1)
|
2019-10-11 17:14:31 +03:00
|
|
|
raise ValueError(f"{line} not found in stdout after {timeout} seconds")
|
2019-04-26 18:27:27 +03:00
|
|
|
finally:
|
|
|
|
client.close()
|
|
|
|
|
2019-10-11 17:14:31 +03:00
|
|
|
def check_for_stdout_line(self, line, timeout):
|
|
|
|
client = self._connect_new()
|
|
|
|
for _ in range(timeout):
|
|
|
|
_, stdout, _ = client.exec_command(f"grep -F '{line}' {self.out}")
|
|
|
|
if stdout.channel.recv_exit_status() == 0:
|
|
|
|
return True
|
|
|
|
time.sleep(1)
|
|
|
|
return False
|
|
|
|
|
2019-08-08 18:11:46 +03:00
|
|
|
def print_and_upload_result(self, name, metrics, lines):
|
2019-06-07 18:35:22 +03:00
|
|
|
client = self._connect_new()
|
|
|
|
try:
|
2019-10-11 17:14:31 +03:00
|
|
|
_, stdout, _ = client.exec_command(f"tail -{lines} {self.out}")
|
2019-06-07 18:35:22 +03:00
|
|
|
if stdout.channel.recv_exit_status() == 0:
|
2019-12-16 14:55:33 +03:00
|
|
|
LOG.success(f"Result for {self.name}, uploaded under {name}:")
|
2019-08-08 18:11:46 +03:00
|
|
|
self._print_upload_perf(name, metrics, stdout.read().splitlines())
|
2019-06-07 18:35:22 +03:00
|
|
|
return
|
|
|
|
finally:
|
|
|
|
client.close()
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def ssh_remote(name, hostname, files, cmd):
|
|
|
|
"""
|
|
|
|
Context Manager wrapper for SSHRemote
|
|
|
|
"""
|
|
|
|
remote = SSHRemote(name, hostname, files, cmd)
|
|
|
|
try:
|
|
|
|
remote.setup()
|
|
|
|
remote.start()
|
|
|
|
yield remote
|
|
|
|
finally:
|
|
|
|
remote.stop()
|
|
|
|
|
|
|
|
|
|
|
|
class LocalRemote(CmdMixin):
|
2019-06-06 00:43:40 +03:00
|
|
|
def __init__(
|
2019-08-22 13:08:15 +03:00
|
|
|
self,
|
|
|
|
name,
|
|
|
|
hostname,
|
|
|
|
exe_files,
|
|
|
|
data_files,
|
|
|
|
cmd,
|
|
|
|
workspace,
|
|
|
|
label,
|
|
|
|
env=None,
|
2019-09-04 14:03:33 +03:00
|
|
|
json_log_path=None,
|
2019-06-06 00:43:40 +03:00
|
|
|
):
|
2019-04-26 18:27:27 +03:00
|
|
|
"""
|
|
|
|
Local Equivalent to the SSHRemote
|
|
|
|
"""
|
|
|
|
self.hostname = hostname
|
2019-06-06 00:43:40 +03:00
|
|
|
self.exe_files = exe_files
|
|
|
|
self.data_files = data_files
|
2019-04-26 18:27:27 +03:00
|
|
|
self.cmd = cmd
|
2019-06-06 00:43:40 +03:00
|
|
|
self.root = os.path.join(workspace, label + "_" + name)
|
2019-04-26 18:27:27 +03:00
|
|
|
self.proc = None
|
|
|
|
self.stdout = None
|
|
|
|
self.stderr = None
|
2019-05-24 15:46:17 +03:00
|
|
|
self.env = env
|
2019-06-07 18:35:22 +03:00
|
|
|
self.name = name
|
2019-08-22 13:08:15 +03:00
|
|
|
self.out = os.path.join(self.root, "out")
|
|
|
|
self.err = os.path.join(self.root, "err")
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def _rc(self, cmd):
|
|
|
|
LOG.info("[{}] {}".format(self.hostname, cmd))
|
|
|
|
return subprocess.call(cmd, shell=True)
|
|
|
|
|
|
|
|
def _setup_files(self):
|
|
|
|
assert self._rc("rm -rf {}".format(self.root)) == 0
|
2019-06-06 00:43:40 +03:00
|
|
|
assert self._rc("mkdir -p {}".format(self.root)) == 0
|
|
|
|
for path in self.exe_files:
|
|
|
|
dst_path = os.path.join(self.root, os.path.basename(path))
|
|
|
|
src_path = os.path.join(os.getcwd(), path)
|
|
|
|
assert self._rc("ln -s {} {}".format(src_path, dst_path)) == 0
|
|
|
|
for path in self.data_files:
|
2019-08-28 12:57:45 +03:00
|
|
|
dst_path = self.root
|
2019-06-06 00:43:40 +03:00
|
|
|
src_path = os.path.join(os.getcwd(), path)
|
|
|
|
assert self._rc("cp {} {}".format(src_path, dst_path)) == 0
|
|
|
|
|
2019-06-27 15:42:31 +03:00
|
|
|
# Make sure relative paths include current directory. Absolute paths will be unaffected
|
|
|
|
self.cmd[0] = os.path.join(".", os.path.normpath(self.cmd[0]))
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-05-29 16:43:08 +03:00
|
|
|
def get(self, filename, timeout=60, targetname=None):
|
2019-04-26 18:27:27 +03:00
|
|
|
path = os.path.join(self.root, filename)
|
|
|
|
for _ in range(timeout):
|
|
|
|
if os.path.exists(path):
|
|
|
|
break
|
|
|
|
time.sleep(1)
|
|
|
|
else:
|
|
|
|
raise ValueError(path)
|
2019-06-03 17:16:32 +03:00
|
|
|
targetname = targetname or filename
|
2019-05-29 16:43:08 +03:00
|
|
|
assert self._rc("cp {} {}".format(path, targetname)) == 0
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def list_files(self):
|
|
|
|
return os.listdir(self.root)
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
def start(self, timeout=10):
|
2019-04-26 18:27:27 +03:00
|
|
|
"""
|
|
|
|
Start cmd. stdout and err are captured to file locally.
|
|
|
|
"""
|
|
|
|
cmd = self._cmd()
|
2019-05-24 15:46:17 +03:00
|
|
|
LOG.info(f"[{self.hostname}] {cmd} (env: {self.env})")
|
2019-08-22 13:08:15 +03:00
|
|
|
self.stdout = open(self.out, "wb")
|
|
|
|
self.stderr = open(self.err, "wb")
|
2019-06-14 12:56:35 +03:00
|
|
|
self.proc = popen(
|
2019-05-24 15:46:17 +03:00
|
|
|
self.cmd,
|
|
|
|
cwd=self.root,
|
|
|
|
stdout=self.stdout,
|
|
|
|
stderr=self.stderr,
|
|
|
|
env=self.env,
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
|
2019-11-06 13:11:32 +03:00
|
|
|
def suspend(self):
|
|
|
|
self.proc.send_signal(signal.SIGSTOP)
|
|
|
|
LOG.info(f"Node {self.name} suspended...")
|
|
|
|
|
|
|
|
def resume(self):
|
|
|
|
self.proc.send_signal(signal.SIGCONT)
|
|
|
|
LOG.info(f"Node {self.name} resuming from suspension...")
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
def stop(self):
|
|
|
|
"""
|
|
|
|
Disconnect the client, and therefore shut down the command as well.
|
|
|
|
"""
|
|
|
|
LOG.info("[{}] closing".format(self.hostname))
|
|
|
|
if self.proc:
|
|
|
|
self.proc.terminate()
|
|
|
|
self.proc.wait()
|
|
|
|
if self.stdout:
|
|
|
|
self.stdout.close()
|
|
|
|
if self.stderr:
|
|
|
|
self.stderr.close()
|
2019-12-20 13:44:50 +03:00
|
|
|
return log_errors(self.out, self.err)
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def setup(self):
|
|
|
|
"""
|
|
|
|
Empty the temporary directory if it exists,
|
|
|
|
and populate it with the initial set of files.
|
|
|
|
"""
|
|
|
|
self._setup_files()
|
|
|
|
|
|
|
|
def _cmd(self):
|
2019-08-22 13:08:15 +03:00
|
|
|
cmd = " ".join(self.cmd)
|
|
|
|
return f"cd {self.root} && {cmd} 1>{self.out} 2>{self.err}"
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def _dbg(self):
|
2019-08-22 13:08:15 +03:00
|
|
|
cmd = " ".join(self.cmd)
|
|
|
|
return f"cd {self.root} && {DBG} --args {cmd}"
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def wait_for_stdout_line(self, line, timeout):
|
|
|
|
for _ in range(timeout):
|
2019-08-22 13:08:15 +03:00
|
|
|
with open(self.out, "rb") as out:
|
2019-04-26 18:27:27 +03:00
|
|
|
for out_line in out:
|
2019-10-11 17:14:31 +03:00
|
|
|
if line in out_line.decode():
|
2019-04-26 18:27:27 +03:00
|
|
|
return
|
|
|
|
time.sleep(1)
|
|
|
|
raise ValueError(
|
|
|
|
"{} not found in stdout after {} seconds".format(line, timeout)
|
|
|
|
)
|
|
|
|
|
2019-10-11 17:14:31 +03:00
|
|
|
def check_for_stdout_line(self, line, timeout):
|
|
|
|
for _ in range(timeout):
|
|
|
|
with open(self.out, "rb") as out:
|
|
|
|
for out_line in out:
|
|
|
|
if line in out_line.decode():
|
|
|
|
return True
|
|
|
|
time.sleep(1)
|
|
|
|
return False
|
|
|
|
|
2019-08-08 18:11:46 +03:00
|
|
|
def print_and_upload_result(self, name, metrics, line):
|
2019-08-22 13:08:15 +03:00
|
|
|
with open(self.out, "rb") as out:
|
2019-06-07 18:35:22 +03:00
|
|
|
lines = out.read().splitlines()
|
|
|
|
result = lines[-line:]
|
2019-12-16 14:55:33 +03:00
|
|
|
LOG.success(f"Result for {self.name}, uploaded under {name}:")
|
2019-08-08 18:11:46 +03:00
|
|
|
self._print_upload_perf(name, metrics, result)
|
2019-06-07 18:35:22 +03:00
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
|
2019-05-24 15:46:17 +03:00
|
|
|
CCF_TO_OE_LOG_LEVEL = {
|
|
|
|
"trace": "VERBOSE",
|
|
|
|
"debug": "INFO",
|
|
|
|
"info": "WARNING",
|
|
|
|
"fail": "ERROR",
|
|
|
|
"fatal": "FATAL",
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
class CCFRemote(object):
|
|
|
|
BIN = "cchost"
|
|
|
|
DEPS = []
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
2019-08-28 12:57:45 +03:00
|
|
|
start_type,
|
2019-04-26 18:27:27 +03:00
|
|
|
lib_path,
|
2019-07-12 17:32:10 +03:00
|
|
|
local_node_id,
|
2019-04-26 18:27:27 +03:00
|
|
|
host,
|
|
|
|
pubhost,
|
2019-08-06 19:38:19 +03:00
|
|
|
node_port,
|
|
|
|
rpc_port,
|
2019-04-26 18:27:27 +03:00
|
|
|
remote_class,
|
|
|
|
enclave_type,
|
2019-06-06 00:43:40 +03:00
|
|
|
workspace,
|
|
|
|
label,
|
2019-08-28 12:57:45 +03:00
|
|
|
target_rpc_address=None,
|
|
|
|
members_certs=None,
|
|
|
|
host_log_level="info",
|
2019-06-12 20:16:22 +03:00
|
|
|
ignore_quote=False,
|
2019-06-06 00:43:40 +03:00
|
|
|
sig_max_tx=1000,
|
|
|
|
sig_max_ms=1000,
|
|
|
|
election_timeout=1000,
|
2019-11-19 14:05:44 +03:00
|
|
|
consensus="raft",
|
2019-06-06 00:43:40 +03:00
|
|
|
memory_reserve_startup=0,
|
|
|
|
notify_server=None,
|
2019-08-28 12:57:45 +03:00
|
|
|
gov_script=None,
|
2019-04-26 18:27:27 +03:00
|
|
|
ledger_file=None,
|
|
|
|
sealed_secrets=None,
|
2019-09-04 14:03:33 +03:00
|
|
|
json_log_path=None,
|
2019-04-26 18:27:27 +03:00
|
|
|
):
|
|
|
|
"""
|
|
|
|
Run a ccf binary on a remote host.
|
|
|
|
"""
|
2019-08-28 12:57:45 +03:00
|
|
|
self.start_type = start_type
|
2019-07-12 17:32:10 +03:00
|
|
|
self.local_node_id = local_node_id
|
2019-04-26 18:27:27 +03:00
|
|
|
self.host = host
|
|
|
|
self.pubhost = pubhost
|
2019-08-06 19:38:19 +03:00
|
|
|
self.node_port = node_port
|
|
|
|
self.rpc_port = rpc_port
|
2019-07-12 17:32:10 +03:00
|
|
|
self.pem = "{}.pem".format(local_node_id)
|
2019-05-13 16:02:44 +03:00
|
|
|
self.quote = None
|
|
|
|
# Only expect a quote if the enclave is not virtual and quotes have
|
2019-06-12 20:16:22 +03:00
|
|
|
# not been explictly ignored
|
|
|
|
if enclave_type != "virtual" and not ignore_quote:
|
2019-07-12 17:32:10 +03:00
|
|
|
self.quote = f"quote{local_node_id}.bin"
|
2019-04-26 18:27:27 +03:00
|
|
|
self.BIN = infra.path.build_bin_path(self.BIN, enclave_type)
|
|
|
|
self.ledger_file = ledger_file
|
|
|
|
self.ledger_file_name = (
|
2019-08-06 19:38:19 +03:00
|
|
|
os.path.basename(ledger_file) if ledger_file else f"{local_node_id}.ledger"
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
|
2019-08-28 12:57:45 +03:00
|
|
|
exe_files = [self.BIN, lib_path] + self.DEPS
|
|
|
|
data_files = ([self.ledger_file] if self.ledger_file else []) + (
|
|
|
|
[sealed_secrets] if sealed_secrets else []
|
|
|
|
)
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
cmd = [
|
|
|
|
self.BIN,
|
2019-11-04 14:47:41 +03:00
|
|
|
f"--enclave-file=./{os.path.basename(lib_path)}",
|
2019-08-28 17:32:40 +03:00
|
|
|
f"--enclave-type={enclave_type}",
|
|
|
|
f"--node-address={host}:{node_port}",
|
|
|
|
f"--rpc-address={host}:{rpc_port}",
|
|
|
|
f"--public-rpc-address={host}:{rpc_port}",
|
|
|
|
f"--ledger-file={self.ledger_file_name}",
|
|
|
|
f"--node-cert-file={self.pem}",
|
|
|
|
f"--host-log-level={host_log_level}",
|
|
|
|
f"--raft-election-timeout-ms={election_timeout}",
|
2019-11-19 14:05:44 +03:00
|
|
|
f"--consensus={consensus}",
|
2019-08-28 17:32:40 +03:00
|
|
|
]
|
|
|
|
|
2019-09-04 14:03:33 +03:00
|
|
|
if json_log_path:
|
|
|
|
log_file = f"{label}_{local_node_id}"
|
|
|
|
cmd += [f"--json-log-path={os.path.join(json_log_path, log_file)}"]
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
if sig_max_tx:
|
|
|
|
cmd += [f"--sig-max-tx={sig_max_tx}"]
|
|
|
|
|
|
|
|
if sig_max_ms:
|
|
|
|
cmd += [f"--sig-max-ms={sig_max_ms}"]
|
|
|
|
|
|
|
|
if memory_reserve_startup:
|
|
|
|
cmd += [f"--memory-reserve-startup={memory_reserve_startup}"]
|
|
|
|
|
|
|
|
if notify_server:
|
|
|
|
notify_server_host, *notify_server_port = notify_server.split(":")
|
|
|
|
|
|
|
|
if not notify_server_host or not (
|
|
|
|
notify_server_port and notify_server_port[0]
|
|
|
|
):
|
2019-06-06 00:43:40 +03:00
|
|
|
raise ValueError(
|
2019-08-28 17:32:40 +03:00
|
|
|
"Notification server host:port configuration is invalid"
|
2019-06-06 00:43:40 +03:00
|
|
|
)
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
cmd += [
|
|
|
|
f"--notify-server-address={notify_server_host}:{notify_server_port[0]}"
|
2019-06-06 00:43:40 +03:00
|
|
|
]
|
|
|
|
|
2019-08-28 17:32:40 +03:00
|
|
|
if self.quote:
|
|
|
|
cmd += [f"--quote-file={self.quote}"]
|
|
|
|
|
2019-09-10 13:34:21 +03:00
|
|
|
if start_type == StartType.new:
|
2019-08-28 17:32:40 +03:00
|
|
|
cmd += [
|
|
|
|
"start",
|
2019-08-29 12:33:29 +03:00
|
|
|
"--network-cert-file=networkcert.pem",
|
2019-08-28 17:32:40 +03:00
|
|
|
f"--gov-script={os.path.basename(gov_script)}",
|
|
|
|
]
|
2019-10-23 10:49:22 +03:00
|
|
|
if members_certs is None:
|
|
|
|
raise ValueError(
|
|
|
|
"Starting node should be given at least one member certificate"
|
|
|
|
)
|
|
|
|
for mc in members_certs:
|
|
|
|
cmd += [f"--member-cert={mc}"]
|
|
|
|
data_files.extend(members_certs)
|
|
|
|
data_files += [os.path.basename(gov_script)]
|
2019-08-28 17:32:40 +03:00
|
|
|
elif start_type == StartType.join:
|
|
|
|
cmd += [
|
|
|
|
"join",
|
|
|
|
"--network-cert-file=networkcert.pem",
|
|
|
|
f"--target-rpc-address={target_rpc_address}",
|
|
|
|
]
|
|
|
|
data_files += ["networkcert.pem"]
|
|
|
|
elif start_type == StartType.recover:
|
2019-08-29 12:33:29 +03:00
|
|
|
cmd += ["recover", "--network-cert-file=networkcert.pem"]
|
2019-08-28 17:32:40 +03:00
|
|
|
else:
|
|
|
|
raise ValueError(
|
|
|
|
f"Unexpected CCFRemote start type {start_type}. Should be start, join or recover"
|
|
|
|
)
|
2019-05-13 16:02:44 +03:00
|
|
|
|
2019-08-16 21:52:55 +03:00
|
|
|
# Necessary for the az-dcap-client >=1.1 (https://github.com/microsoft/Azure-DCAP-Client/issues/84)
|
|
|
|
env = {"HOME": os.environ["HOME"]}
|
2019-05-28 18:33:57 +03:00
|
|
|
self.profraw = None
|
2019-08-09 15:53:46 +03:00
|
|
|
if enclave_type == "virtual":
|
|
|
|
env["UBSAN_OPTIONS"] = "print_stacktrace=1"
|
|
|
|
if coverage_enabled(lib_path):
|
|
|
|
self.profraw = f"{uuid.uuid4()}-{local_node_id}_{os.path.basename(lib_path)}.profraw"
|
|
|
|
env["LLVM_PROFILE_FILE"] = self.profraw
|
2019-05-28 18:33:57 +03:00
|
|
|
|
2019-08-28 12:57:45 +03:00
|
|
|
oe_log_level = CCF_TO_OE_LOG_LEVEL.get(host_log_level)
|
2019-05-24 15:46:17 +03:00
|
|
|
if oe_log_level:
|
|
|
|
env["OE_LOG_LEVEL"] = oe_log_level
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
self.remote = remote_class(
|
2019-08-29 18:01:57 +03:00
|
|
|
local_node_id,
|
|
|
|
host,
|
|
|
|
exe_files,
|
|
|
|
data_files,
|
|
|
|
cmd,
|
|
|
|
workspace,
|
|
|
|
label,
|
|
|
|
env,
|
2019-09-04 14:03:33 +03:00
|
|
|
json_log_path,
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
def setup(self):
|
|
|
|
self.remote.setup()
|
|
|
|
|
|
|
|
def start(self):
|
2019-08-28 17:32:40 +03:00
|
|
|
self.remote.start()
|
2019-09-11 16:28:30 +03:00
|
|
|
|
2019-11-06 13:11:32 +03:00
|
|
|
def suspend(self):
|
|
|
|
return self.remote.suspend()
|
|
|
|
|
|
|
|
def resume(self):
|
|
|
|
self.remote.resume()
|
|
|
|
|
2019-09-11 16:28:30 +03:00
|
|
|
def get_startup_files(self):
|
2019-08-28 17:32:40 +03:00
|
|
|
self.remote.get(self.pem)
|
2019-09-10 13:34:21 +03:00
|
|
|
if self.start_type in {StartType.new, StartType.recover}:
|
2019-08-28 12:57:45 +03:00
|
|
|
self.remote.get("networkcert.pem")
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def debug_node_cmd(self):
|
|
|
|
return self.remote._dbg()
|
|
|
|
|
|
|
|
def stop(self):
|
2019-12-20 13:44:50 +03:00
|
|
|
errors = []
|
2019-04-26 18:27:27 +03:00
|
|
|
try:
|
2019-12-20 13:44:50 +03:00
|
|
|
errors = self.remote.stop()
|
2019-04-26 18:27:27 +03:00
|
|
|
except Exception:
|
2019-07-12 17:32:10 +03:00
|
|
|
LOG.exception("Failed to shut down {} cleanly".format(self.local_node_id))
|
2019-05-28 18:33:57 +03:00
|
|
|
if self.profraw:
|
|
|
|
try:
|
|
|
|
self.remote.get(self.profraw)
|
|
|
|
except Exception:
|
|
|
|
LOG.info(f"Could not retrieve {self.profraw}")
|
2019-12-20 13:44:50 +03:00
|
|
|
return errors
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
def wait_for_stdout_line(self, line, timeout=5):
|
|
|
|
return self.remote.wait_for_stdout_line(line, timeout)
|
|
|
|
|
2019-08-08 18:11:46 +03:00
|
|
|
def print_and_upload_result(self, name, metrics, lines):
|
|
|
|
self.remote.print_and_upload_result(name, metrics, lines)
|
2019-06-07 18:35:22 +03:00
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
def set_perf(self):
|
|
|
|
self.remote.set_perf()
|
|
|
|
|
|
|
|
def get_sealed_secrets(self):
|
|
|
|
files = self.remote.list_files()
|
|
|
|
sealed_secrets_files = []
|
|
|
|
for f in files:
|
|
|
|
if f.startswith("sealed_secrets."):
|
|
|
|
sealed_secrets_files.append(f)
|
|
|
|
|
|
|
|
latest_sealed_secrets = sorted(sealed_secrets_files, reverse=True)[0]
|
|
|
|
self.remote.get(latest_sealed_secrets)
|
|
|
|
|
|
|
|
return latest_sealed_secrets
|
|
|
|
|
|
|
|
def get_ledger(self):
|
|
|
|
self.remote.get(self.ledger_file_name)
|
|
|
|
return self.ledger_file_name
|
|
|
|
|
2019-06-06 00:43:40 +03:00
|
|
|
def ledger_path(self):
|
|
|
|
return os.path.join(self.remote.root, self.ledger_file_name)
|
|
|
|
|
2019-04-26 18:27:27 +03:00
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
def ccf_remote(
|
2019-08-06 19:38:19 +03:00
|
|
|
lib_path, local_node_id, host, pubhost, node_port, rpc_port, args, remote_class
|
2019-04-26 18:27:27 +03:00
|
|
|
):
|
|
|
|
"""
|
|
|
|
Context Manager wrapper for CCFRemote
|
|
|
|
"""
|
|
|
|
remote = CCFRemote(
|
2019-08-06 19:38:19 +03:00
|
|
|
lib_path, local_node_id, host, pubhost, node_port, rpc_port, args, remote_class
|
2019-04-26 18:27:27 +03:00
|
|
|
)
|
|
|
|
try:
|
|
|
|
remote.setup()
|
|
|
|
remote.start()
|
|
|
|
yield remote
|
|
|
|
finally:
|
|
|
|
remote.stop()
|
|
|
|
|
|
|
|
|
2019-08-28 12:57:45 +03:00
|
|
|
class StartType(Enum):
|
2019-09-10 13:34:21 +03:00
|
|
|
new = 0
|
2019-08-28 12:57:45 +03:00
|
|
|
join = 1
|
|
|
|
recover = 2
|