зеркало из https://github.com/microsoft/CCF.git
Log execution metrics in `js_generic` (#5123)
This commit is contained in:
Родитель
33eac6c10a
Коммит
45b5cc16f7
|
@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
||||
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added
|
||||
|
||||
- Added logging of JS execution time for all requests. This can be disabled in confidential scenarios with the new `ccf.enableMetricsLogging` function in the JS API. After calling `ccf.enableMetricsLogging(false)`, this logging will not be emitted.
|
||||
|
||||
## [4.0.0-dev5]
|
||||
|
||||
[4.0.0-dev5]: https://github.com/microsoft/CCF/releases/tag/ccf-4.0.0-dev5
|
||||
|
|
|
@ -130,6 +130,21 @@ In many places where timestamps are desired, they should come from the outside w
|
|||
|
||||
To ease porting of existing apps, and for logging scenarios, there is an option to retrieve the current time from the host. When the executing CCF node is run by an honest operator this will be kept up-to-date, but the accuracy of this is not covered by any attestation and as such these times should not be relied upon. To enable use of this untrusted time, call ``ccf.enableUntrustedDateTime(true)`` at any point in your application code, including at the global scope. After this is enabled, calls to ``Date.now()`` will retrieve the current time as specified by the untrusted host. This behaviour can also be revoked by a call to ``ccf.enableUntrustedDateTime(false)``, allowing the untrusted behaviour to be tightly scoped, and explicitly opted in to at each call point.
|
||||
|
||||
Execution metrics
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
By default the CCF JS runtime will print a log line for each completed JS request. This lists the request path and response status as well as how long the request took to execute. Each line also includes a ``[js]`` tag so that they can easily be filtered and sent to an automated monitoring system. These lines have the following format:
|
||||
|
||||
.. code-block::
|
||||
|
||||
<timestamp> [info ][js] <...> | JS execution complete: Method=GET, Path=/app/make_randoms, Status=200, ExecMilliseconds=30
|
||||
|
||||
These are designed to aid debugging, as a starting point for building operational metrics graphs.
|
||||
|
||||
.. note:: The execution time is only precise within a few milliseconds, and relies on the time provided by the untrusted host. It should be used for comparing the execution time between different requests, and as an approximation of real execution time, but will not help distinguish requests which complete in under 1ms.
|
||||
|
||||
Some applications may not wish to log this information to the untrusted host (for example, if the frequency of each request type is considered confidential). This logging can be disabled by a call to ``ccf.enableMetricsLogging(false)`` at any point in the application code. This could be at the global scope to disable this logging for all calls, or within a single request handler to selectively mute that request.
|
||||
|
||||
Deployment
|
||||
----------
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ CCF Documentation
|
|||
|
||||
.. image:: img/ts.svg
|
||||
:width: 32
|
||||
:target: ./build_apps/js_app.html
|
||||
:target: ./build_apps/js_app_ts.html
|
||||
:alt: TypeScript
|
||||
:align: left
|
||||
|
||||
|
|
|
@ -688,14 +688,27 @@ export interface CCF {
|
|||
|
||||
/**
|
||||
* Toggles implementation of Date global API between using untrusted host time
|
||||
* and returning 0 (default).
|
||||
* (when enabled)and returning 0 (when disabled). By default this is disabled.
|
||||
*
|
||||
* Returns previous value, allowing a global default to be maintained.
|
||||
* Returns the previous value, allowing a global default to be maintained.
|
||||
*
|
||||
* @param enable If true, then subsequent calls to Date.now() will return untrusted
|
||||
* host time
|
||||
*/
|
||||
enableUntrustedDateTime(enable: boolean): boolean;
|
||||
|
||||
/**
|
||||
* Toggles whether common metrics (including the path, the response status,
|
||||
* and the execution time) should be logged when this execution completes.
|
||||
* By default this is enabled. If the request pattern should not be visible
|
||||
* to an operator, then this behaviour should be disabled.
|
||||
*
|
||||
* Returns the previous value, allowing a global default to be maintained.
|
||||
*
|
||||
* @param enable If false, then no built-in logging will be emitted describing
|
||||
* this execution
|
||||
*/
|
||||
enableMetricsLogging(enable: boolean): boolean;
|
||||
}
|
||||
|
||||
export const openenclave: OpenEnclave = (<any>globalThis).openenclave;
|
||||
|
|
|
@ -536,6 +536,10 @@ class CCFPolyfill implements CCF {
|
|||
enableUntrustedDateTime(enable: boolean): boolean {
|
||||
throw new Error("Not implemented");
|
||||
}
|
||||
|
||||
enableMetricsLogging(enable: boolean): boolean {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
(<any>globalThis).ccf = new CCFPolyfill();
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "ccf/historical_queries_adapter.h"
|
||||
#include "ccf/node/host_processes_interface.h"
|
||||
#include "ccf/version.h"
|
||||
#include "enclave/enclave_time.h"
|
||||
#include "js/wrap.h"
|
||||
#include "kv/untyped_map.h"
|
||||
#include "named_auth_policies.h"
|
||||
|
@ -320,7 +321,7 @@ namespace ccfapp
|
|||
|
||||
if (JS_IsException(val))
|
||||
{
|
||||
bool time_out = ctx.host_time.request_timed_out;
|
||||
bool time_out = ctx.interrupt_data.request_timed_out;
|
||||
std::string error_msg = "Exception thrown while executing.";
|
||||
|
||||
js::js_dump_error(ctx);
|
||||
|
@ -422,6 +423,7 @@ namespace ccfapp
|
|||
endpoint_ctx.rpc_ctx->set_response_body(std::move(response_body));
|
||||
}
|
||||
}
|
||||
|
||||
// Response headers
|
||||
{
|
||||
auto response_headers_js = val["headers"];
|
||||
|
@ -457,8 +459,8 @@ namespace ccfapp
|
|||
}
|
||||
|
||||
// Response status code
|
||||
int response_status_code = HTTP_STATUS_OK;
|
||||
{
|
||||
int response_status_code = HTTP_STATUS_OK;
|
||||
auto status_code_js = ctx(JS_GetPropertyStr(ctx, val, "statusCode"));
|
||||
if (!JS_IsUndefined(status_code_js) && !JS_IsNull(status_code_js))
|
||||
{
|
||||
|
@ -475,6 +477,26 @@ namespace ccfapp
|
|||
endpoint_ctx.rpc_ctx->set_response_status(response_status_code);
|
||||
}
|
||||
|
||||
// Log execution metrics
|
||||
if (ctx.log_execution_metrics)
|
||||
{
|
||||
const auto time_now = ccf::get_enclave_time();
|
||||
// Although enclave time returns a microsecond value, the actual
|
||||
// precision/granularity depends on the host's TimeUpdater. By default
|
||||
// this only advances each millisecond. Avoid implying more precision
|
||||
// than that, by rounding to milliseconds
|
||||
const auto exec_time =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
time_now - ctx.interrupt_data.start_time);
|
||||
CCF_LOG_FMT(INFO, "js")
|
||||
("JS execution complete: Method={}, Path={}, Status={}, "
|
||||
"ExecMilliseconds={}",
|
||||
endpoint->dispatch.verb.c_str(),
|
||||
endpoint->full_uri_path,
|
||||
response_status_code,
|
||||
exec_time.count());
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -58,7 +58,6 @@ namespace ccf::js
|
|||
JSClassDef historical_class_def = {};
|
||||
JSClassDef historical_state_class_def = {};
|
||||
|
||||
std::chrono::milliseconds execution_time = default_max_execution_time;
|
||||
std::vector<FFIPlugin> ffi_plugins;
|
||||
|
||||
static void register_ffi_plugin(const FFIPlugin& plugin)
|
||||
|
@ -85,15 +84,15 @@ namespace ccf::js
|
|||
|
||||
static int js_custom_interrupt_handler(JSRuntime* rt, void* opaque)
|
||||
{
|
||||
UntrustedHostTime* time = reinterpret_cast<UntrustedHostTime*>(opaque);
|
||||
InterruptData* inter = reinterpret_cast<InterruptData*>(opaque);
|
||||
auto now = ccf::get_enclave_time();
|
||||
auto elapsed_time = now - time->start_time;
|
||||
auto elapsed_time = now - inter->start_time;
|
||||
auto elapsed_ms =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_time);
|
||||
if (elapsed_ms.count() >= time->max_execution_time.count())
|
||||
if (elapsed_ms.count() >= inter->max_execution_time.count())
|
||||
{
|
||||
LOG_INFO_FMT("JS execution has timed out after {}ms", elapsed_ms.count());
|
||||
time->request_timed_out = true;
|
||||
inter->request_timed_out = true;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
|
@ -105,6 +104,9 @@ namespace ccf::js
|
|||
JSWrappedValue Context::call(
|
||||
const JSWrappedValue& f, const std::vector<js::JSWrappedValue>& argv)
|
||||
{
|
||||
auto rt = JS_GetRuntime(ctx);
|
||||
js::Runtime& jsrt = *(js::Runtime*)JS_GetRuntimeOpaque(rt);
|
||||
|
||||
std::vector<JSValue> argvn;
|
||||
argvn.reserve(argv.size());
|
||||
for (auto& a : argv)
|
||||
|
@ -112,10 +114,9 @@ namespace ccf::js
|
|||
argvn.push_back(a.val);
|
||||
}
|
||||
const auto curr_time = ccf::get_enclave_time();
|
||||
host_time.start_time = curr_time;
|
||||
host_time.max_execution_time = execution_time;
|
||||
JS_SetInterruptHandler(
|
||||
JS_GetRuntime(ctx), js_custom_interrupt_handler, &host_time);
|
||||
interrupt_data.start_time = curr_time;
|
||||
interrupt_data.max_execution_time = jsrt.get_max_exec_time();
|
||||
JS_SetInterruptHandler(rt, js_custom_interrupt_handler, &interrupt_data);
|
||||
|
||||
return W(JS_Call(ctx, f, JS_UNDEFINED, argv.size(), argvn.data()));
|
||||
}
|
||||
|
@ -127,6 +128,9 @@ namespace ccf::js
|
|||
{
|
||||
throw std::runtime_error("Failed to initialise QuickJS runtime");
|
||||
}
|
||||
|
||||
JS_SetRuntimeOpaque(rt, this);
|
||||
|
||||
size_t stack_size = default_stack_size;
|
||||
size_t heap_size = default_heap_size;
|
||||
|
||||
|
@ -137,7 +141,7 @@ namespace ccf::js
|
|||
{
|
||||
heap_size = js_runtime_options.value().max_heap_bytes;
|
||||
stack_size = js_runtime_options.value().max_stack_bytes;
|
||||
execution_time = std::chrono::milliseconds{
|
||||
max_exec_time = std::chrono::milliseconds{
|
||||
js_runtime_options.value().max_execution_time_ms};
|
||||
}
|
||||
|
||||
|
@ -1706,6 +1710,28 @@ namespace ccf::js
|
|||
return JS_NewBool(ctx, previous);
|
||||
}
|
||||
|
||||
static JSValue js_enable_metrics_logging(
|
||||
JSContext* ctx, JSValueConst, int argc, JSValueConst* argv)
|
||||
{
|
||||
if (argc != 1)
|
||||
{
|
||||
return JS_ThrowTypeError(
|
||||
ctx, "Passed %d arguments, but expected 1", argc);
|
||||
}
|
||||
|
||||
const auto v = argv[0];
|
||||
if (!JS_IsBool(v))
|
||||
{
|
||||
return JS_ThrowTypeError(ctx, "First argument must be a boolean");
|
||||
}
|
||||
|
||||
js::Context* jsctx = (js::Context*)JS_GetContextOpaque(ctx);
|
||||
const auto previous = jsctx->log_execution_metrics;
|
||||
jsctx->log_execution_metrics = JS_ToBool(ctx, v);
|
||||
|
||||
return JS_NewBool(ctx, previous);
|
||||
}
|
||||
|
||||
JSWrappedValue Context::default_function(
|
||||
const std::string& code, const std::string& path)
|
||||
|
||||
|
@ -1843,6 +1869,13 @@ namespace ccf::js
|
|||
JS_NewCFunction(
|
||||
ctx, js_enable_untrusted_date_time, "enableUntrustedDateTime", 1));
|
||||
|
||||
JS_SetPropertyStr(
|
||||
ctx,
|
||||
ccf,
|
||||
"enableMetricsLogging",
|
||||
JS_NewCFunction(
|
||||
ctx, js_enable_metrics_logging, "enableMetricsLogging", 1));
|
||||
|
||||
/* Moved to ccf.crypto namespace and now deprecated. Can be removed in 4.x
|
||||
*/
|
||||
JS_SetPropertyStr(
|
||||
|
|
|
@ -228,7 +228,7 @@ namespace ccf::js
|
|||
JSWrappedValue load_app_module(
|
||||
JSContext* ctx, const char* module_name, kv::Tx* tx);
|
||||
|
||||
struct UntrustedHostTime
|
||||
struct InterruptData
|
||||
{
|
||||
std::chrono::microseconds start_time;
|
||||
std::chrono::milliseconds max_execution_time;
|
||||
|
@ -239,6 +239,8 @@ namespace ccf::js
|
|||
{
|
||||
JSRuntime* rt = nullptr;
|
||||
|
||||
std::chrono::milliseconds max_exec_time = default_max_execution_time;
|
||||
|
||||
public:
|
||||
Runtime(kv::Tx* tx);
|
||||
~Runtime();
|
||||
|
@ -249,6 +251,11 @@ namespace ccf::js
|
|||
}
|
||||
|
||||
void add_ccf_classdefs();
|
||||
|
||||
std::chrono::milliseconds get_max_exec_time() const
|
||||
{
|
||||
return max_exec_time;
|
||||
}
|
||||
};
|
||||
|
||||
class Context
|
||||
|
@ -257,8 +264,9 @@ namespace ccf::js
|
|||
|
||||
public:
|
||||
const TxAccess access;
|
||||
UntrustedHostTime host_time;
|
||||
InterruptData interrupt_data;
|
||||
bool implement_untrusted_time = false;
|
||||
bool log_execution_metrics = true;
|
||||
|
||||
Context(JSRuntime* rt, TxAccess acc) : access(acc)
|
||||
{
|
||||
|
|
|
@ -172,7 +172,7 @@ namespace ccf
|
|||
|
||||
auto [reason, trace] = js::js_error_message(context);
|
||||
|
||||
if (context.host_time.request_timed_out)
|
||||
if (context.interrupt_data.request_timed_out)
|
||||
{
|
||||
reason = "Operation took too long to complete.";
|
||||
}
|
||||
|
@ -230,7 +230,7 @@ namespace ccf
|
|||
{
|
||||
pi_.value().state = ProposalState::FAILED;
|
||||
auto [reason, trace] = js::js_error_message(js_context);
|
||||
if (js_context.host_time.request_timed_out)
|
||||
if (js_context.interrupt_data.request_timed_out)
|
||||
{
|
||||
reason = "Operation took too long to complete.";
|
||||
}
|
||||
|
@ -330,7 +330,7 @@ namespace ccf
|
|||
{
|
||||
pi_.value().state = ProposalState::FAILED;
|
||||
auto [reason, trace] = js::js_error_message(js_context);
|
||||
if (js_context.host_time.request_timed_out)
|
||||
if (js_context.interrupt_data.request_timed_out)
|
||||
{
|
||||
reason = "Operation took too long to complete.";
|
||||
}
|
||||
|
@ -1179,7 +1179,7 @@ namespace ccf
|
|||
if (JS_IsException(val))
|
||||
{
|
||||
auto [reason, trace] = js_error_message(context);
|
||||
if (context.host_time.request_timed_out)
|
||||
if (context.interrupt_data.request_timed_out)
|
||||
{
|
||||
reason = "Operation took too long to complete.";
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import functools
|
|||
from datetime import datetime, timedelta
|
||||
from infra.consortium import slurp_file
|
||||
from infra.snp import IS_SNP
|
||||
from collections import deque
|
||||
|
||||
|
||||
from loguru import logger as LOG
|
||||
|
@ -97,6 +98,66 @@ class UserInfo:
|
|||
key_path: str
|
||||
|
||||
|
||||
DEFAULT_TAIL_LINES_LEN = 10
|
||||
|
||||
|
||||
def log_errors(
|
||||
out_path,
|
||||
err_path,
|
||||
tail_lines_len=DEFAULT_TAIL_LINES_LEN,
|
||||
ignore_error_patterns=None,
|
||||
):
|
||||
error_filter = ["[fail ]", "[fatal]", "Atom leak", "atom leakage"]
|
||||
error_lines = []
|
||||
try:
|
||||
tail_lines = deque(maxlen=tail_lines_len)
|
||||
with open(out_path, "r", errors="replace", encoding="utf-8") as lines:
|
||||
for line_no, line in enumerate(lines):
|
||||
stripped_line = line.rstrip()
|
||||
tail_lines.append(stripped_line)
|
||||
if any(x in stripped_line for x in error_filter):
|
||||
ignore = False
|
||||
if ignore_error_patterns is not None:
|
||||
for pattern in ignore_error_patterns:
|
||||
if pattern in stripped_line:
|
||||
ignore = True
|
||||
break
|
||||
if not ignore:
|
||||
LOG.error(f"{out_path}:{line_no+1}: {stripped_line}")
|
||||
error_lines.append(stripped_line)
|
||||
if error_lines:
|
||||
LOG.info(
|
||||
"{} errors found, printing end of output for context:", len(error_lines)
|
||||
)
|
||||
for line in tail_lines:
|
||||
LOG.info(line)
|
||||
except IOError:
|
||||
LOG.exception("Could not check output {} for errors".format(out_path))
|
||||
|
||||
fatal_error_lines = []
|
||||
try:
|
||||
with open(err_path, "r", errors="replace", encoding="utf-8") as lines:
|
||||
fatal_error_lines = [
|
||||
line
|
||||
for line in lines.readlines()
|
||||
if not line.startswith("[get_qpl_handle ")
|
||||
]
|
||||
if fatal_error_lines:
|
||||
LOG.error(f"Contents of {err_path}:\n{''.join(fatal_error_lines)}")
|
||||
except IOError:
|
||||
LOG.exception("Could not read err output {}".format(err_path))
|
||||
|
||||
# See https://github.com/microsoft/CCF/issues/1701
|
||||
ignore_fatal_errors = False
|
||||
for line in fatal_error_lines:
|
||||
if line.startswith("Tracer caught signal 11"):
|
||||
ignore_fatal_errors = True
|
||||
if ignore_fatal_errors:
|
||||
fatal_error_lines = []
|
||||
|
||||
return error_lines, fatal_error_lines
|
||||
|
||||
|
||||
class Network:
|
||||
KEY_GEN = "keygenerator.sh"
|
||||
SHARE_SCRIPT = "submit_recovery_share.sh"
|
||||
|
@ -719,9 +780,14 @@ class Network:
|
|||
|
||||
node_errors = {}
|
||||
for node in self.nodes:
|
||||
_, fatal_errors = node.stop(
|
||||
ignore_error_patterns=self.ignore_error_patterns
|
||||
)
|
||||
node.stop()
|
||||
out_path, err_path = node.get_logs()
|
||||
if out_path is not None and err_path is not None:
|
||||
_, fatal_errors = log_errors(
|
||||
out_path, err_path, ignore_error_patterns=self.ignore_error_patterns
|
||||
)
|
||||
else:
|
||||
fatal_errors = []
|
||||
node_errors[node.local_node_id] = fatal_errors
|
||||
if fatal_errors:
|
||||
fatal_error_found = True
|
||||
|
@ -770,7 +836,12 @@ class Network:
|
|||
LOG.error(f"New pending node {node.node_id} failed to join the network")
|
||||
if stop_on_error:
|
||||
assert node.remote.check_done()
|
||||
errors, _ = node.stop()
|
||||
node.stop()
|
||||
out_path, err_path = node.get_logs()
|
||||
if out_path is not None and err_path is not None:
|
||||
errors, _ = log_errors(out_path, err_path)
|
||||
else:
|
||||
errors = []
|
||||
self.nodes.remove(node)
|
||||
if errors:
|
||||
# Throw accurate exceptions if known errors found in
|
||||
|
|
|
@ -333,7 +333,6 @@ class Node:
|
|||
break
|
||||
except Exception as e:
|
||||
if self.remote.check_done():
|
||||
self.remote.get_logs(tail_lines_len=None)
|
||||
raise RuntimeError(
|
||||
f"Error starting node {self.local_node_id}"
|
||||
) from e
|
||||
|
@ -444,14 +443,18 @@ class Node:
|
|||
# In the infra, public RPC port is always the same as local RPC port
|
||||
rpc_interface.public_port = rpc_interface.port
|
||||
|
||||
def stop(self, *args, **kwargs):
|
||||
def stop(self):
|
||||
if self.remote and self.network_state is not NodeNetworkState.stopped:
|
||||
if self.suspended:
|
||||
self.resume()
|
||||
self.network_state = NodeNetworkState.stopped
|
||||
LOG.info(f"Stopping node {self.local_node_id}")
|
||||
return self.remote.stop(*args, **kwargs)
|
||||
return [], []
|
||||
self.remote.stop()
|
||||
|
||||
def get_logs(self):
|
||||
if self.remote is not None:
|
||||
return self.remote.get_logs()
|
||||
return None, None
|
||||
|
||||
def sigterm(self):
|
||||
self.remote.sigterm()
|
||||
|
|
|
@ -12,7 +12,6 @@ import signal
|
|||
import re
|
||||
import stat
|
||||
import shutil
|
||||
from collections import deque
|
||||
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
||||
import json
|
||||
import infra.snp as snp
|
||||
|
@ -55,66 +54,6 @@ def sftp_session(hostname):
|
|||
client.close()
|
||||
|
||||
|
||||
DEFAULT_TAIL_LINES_LEN = 10
|
||||
|
||||
|
||||
def log_errors(
|
||||
out_path,
|
||||
err_path,
|
||||
tail_lines_len=DEFAULT_TAIL_LINES_LEN,
|
||||
ignore_error_patterns=None,
|
||||
):
|
||||
error_filter = ["[fail ]", "[fatal]", "Atom leak", "atom leakage"]
|
||||
error_lines = []
|
||||
try:
|
||||
tail_lines = deque(maxlen=tail_lines_len)
|
||||
with open(out_path, "r", errors="replace", encoding="utf-8") as lines:
|
||||
for line_no, line in enumerate(lines):
|
||||
stripped_line = line.rstrip()
|
||||
tail_lines.append(stripped_line)
|
||||
if any(x in stripped_line for x in error_filter):
|
||||
ignore = False
|
||||
if ignore_error_patterns is not None:
|
||||
for pattern in ignore_error_patterns:
|
||||
if pattern in stripped_line:
|
||||
ignore = True
|
||||
break
|
||||
if not ignore:
|
||||
LOG.error(f"{out_path}:{line_no+1}: {stripped_line}")
|
||||
error_lines.append(stripped_line)
|
||||
if error_lines:
|
||||
LOG.info(
|
||||
"{} errors found, printing end of output for context:", len(error_lines)
|
||||
)
|
||||
for line in tail_lines:
|
||||
LOG.info(line)
|
||||
except IOError:
|
||||
LOG.exception("Could not check output {} for errors".format(out_path))
|
||||
|
||||
fatal_error_lines = []
|
||||
try:
|
||||
with open(err_path, "r", errors="replace", encoding="utf-8") as lines:
|
||||
fatal_error_lines = [
|
||||
line
|
||||
for line in lines.readlines()
|
||||
if not line.startswith("[get_qpl_handle ")
|
||||
]
|
||||
if fatal_error_lines:
|
||||
LOG.error(f"Contents of {err_path}:\n{''.join(fatal_error_lines)}")
|
||||
except IOError:
|
||||
LOG.exception("Could not read err output {}".format(err_path))
|
||||
|
||||
# See https://github.com/microsoft/CCF/issues/1701
|
||||
ignore_fatal_errors = False
|
||||
for line in fatal_error_lines:
|
||||
if line.startswith("Tracer caught signal 11"):
|
||||
ignore_fatal_errors = True
|
||||
if ignore_fatal_errors:
|
||||
fatal_error_lines = []
|
||||
|
||||
return error_lines, fatal_error_lines
|
||||
|
||||
|
||||
class CmdMixin(object):
|
||||
def set_perf(self):
|
||||
self.cmd = [
|
||||
|
@ -285,9 +224,7 @@ class SSHRemote(CmdMixin):
|
|||
raise ValueError(self.root)
|
||||
return files
|
||||
|
||||
def get_logs(
|
||||
self, tail_lines_len=DEFAULT_TAIL_LINES_LEN, ignore_error_patterns=None
|
||||
):
|
||||
def get_logs(self):
|
||||
with sftp_session(self.hostname) as session:
|
||||
for filepath in (self.err, self.out):
|
||||
try:
|
||||
|
@ -303,12 +240,9 @@ class SSHRemote(CmdMixin):
|
|||
filepath, dst_path, self.hostname
|
||||
)
|
||||
)
|
||||
return log_errors(
|
||||
os.path.join(self.common_dir, "{}_{}_out".format(self.hostname, self.name)),
|
||||
os.path.join(self.common_dir, "{}_{}_err".format(self.hostname, self.name)),
|
||||
tail_lines_len=tail_lines_len,
|
||||
ignore_error_patterns=ignore_error_patterns,
|
||||
)
|
||||
return os.path.join(
|
||||
self.common_dir, "{}_{}_out".format(self.hostname, self.name)
|
||||
), os.path.join(self.common_dir, "{}_{}_err".format(self.hostname, self.name))
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
|
@ -353,18 +287,13 @@ class SSHRemote(CmdMixin):
|
|||
if stdout.channel.recv_exit_status() != 0:
|
||||
raise RuntimeError(f"Remote {self.name} could not deliver SIGTERM")
|
||||
|
||||
def stop(self, ignore_error_patterns=None):
|
||||
def stop(self):
|
||||
"""
|
||||
Disconnect the client, and therefore shut down the command as well.
|
||||
"""
|
||||
LOG.info("[{}] closing".format(self.hostname))
|
||||
(
|
||||
errors,
|
||||
fatal_errors,
|
||||
) = self.get_logs(ignore_error_patterns=ignore_error_patterns)
|
||||
self.client.close()
|
||||
self.proc_client.close()
|
||||
return errors, fatal_errors
|
||||
|
||||
def setup(self, **kwargs):
|
||||
"""
|
||||
|
@ -512,15 +441,8 @@ class LocalRemote(CmdMixin):
|
|||
def resume(self):
|
||||
self.proc.send_signal(signal.SIGCONT)
|
||||
|
||||
def get_logs(
|
||||
self, tail_lines_len=DEFAULT_TAIL_LINES_LEN, ignore_error_patterns=None
|
||||
):
|
||||
return log_errors(
|
||||
self.out,
|
||||
self.err,
|
||||
tail_lines_len=tail_lines_len,
|
||||
ignore_error_patterns=ignore_error_patterns,
|
||||
)
|
||||
def get_logs(self):
|
||||
return self.out, self.err
|
||||
|
||||
def _print_stack_trace(self):
|
||||
if shutil.which("lldb") != "":
|
||||
|
@ -564,7 +486,7 @@ class LocalRemote(CmdMixin):
|
|||
def sigterm(self):
|
||||
self.proc.terminate()
|
||||
|
||||
def stop(self, ignore_error_patterns=None):
|
||||
def stop(self):
|
||||
"""
|
||||
Disconnect the client, and therefore shut down the command as well.
|
||||
"""
|
||||
|
@ -589,7 +511,6 @@ class LocalRemote(CmdMixin):
|
|||
self.stdout.close()
|
||||
if self.stderr:
|
||||
self.stderr.close()
|
||||
return self.get_logs(ignore_error_patterns=ignore_error_patterns)
|
||||
|
||||
def setup(self, use_links=True):
|
||||
"""
|
||||
|
@ -1101,13 +1022,11 @@ class CCFRemote(object):
|
|||
def sigterm(self):
|
||||
self.remote.sigterm()
|
||||
|
||||
def stop(self, *args, **kwargs):
|
||||
errors, fatal_errors = [], []
|
||||
def stop(self):
|
||||
try:
|
||||
errors, fatal_errors = self.remote.stop(*args, **kwargs)
|
||||
self.remote.stop()
|
||||
except Exception:
|
||||
LOG.exception("Failed to shut down {} cleanly".format(self.local_node_id))
|
||||
return errors, fatal_errors
|
||||
|
||||
def check_done(self):
|
||||
return self.remote.check_done()
|
||||
|
@ -1181,12 +1100,8 @@ class CCFRemote(object):
|
|||
paths += [os.path.join(self.remote.root, read_only_ledger_dir_name)]
|
||||
return paths
|
||||
|
||||
def get_logs(
|
||||
self, tail_lines_len=DEFAULT_TAIL_LINES_LEN, ignore_error_patterns=None
|
||||
):
|
||||
return self.remote.get_logs(
|
||||
tail_lines_len=tail_lines_len, ignore_error_patterns=ignore_error_patterns
|
||||
)
|
||||
def get_logs(self):
|
||||
return self.remote.get_logs()
|
||||
|
||||
def get_host(self):
|
||||
return self.pub_host
|
||||
|
|
|
@ -196,13 +196,15 @@ class DockerShim(infra.remote.CCFRemote):
|
|||
def get_host(self):
|
||||
return self.container_ip
|
||||
|
||||
def stop(self, *args, **kwargs):
|
||||
def stop(self):
|
||||
try:
|
||||
self.container.stop()
|
||||
LOG.info(f"Stopped container {self.container.name}")
|
||||
except docker.errors.NotFound:
|
||||
pass
|
||||
return self.remote.get_logs(*args, **kwargs)
|
||||
|
||||
def get_logs(self):
|
||||
return self.remote.get_logs()
|
||||
|
||||
def suspend(self):
|
||||
self.container.pause()
|
||||
|
|
|
@ -52,6 +52,16 @@
|
|||
"openapi": {}
|
||||
}
|
||||
},
|
||||
"/fibonacci/{n}": {
|
||||
"get": {
|
||||
"js_module": "endpoints.js",
|
||||
"js_function": "fibonacci",
|
||||
"forwarding_required": "never",
|
||||
"authn_policies": ["no_auth"],
|
||||
"mode": "readonly",
|
||||
"openapi": {}
|
||||
}
|
||||
},
|
||||
"/make_randoms": {
|
||||
"get": {
|
||||
"js_module": "endpoints.js",
|
||||
|
|
|
@ -11,6 +11,22 @@ export function make_randoms(request) {
|
|||
};
|
||||
}
|
||||
|
||||
function slow_fibonacci(n) {
|
||||
return n < 1 ? 0 : n <= 2 ? 1 : slow_fibonacci(n - 1) + slow_fibonacci(n - 2);
|
||||
}
|
||||
|
||||
export function fibonacci(request) {
|
||||
const params = request.params;
|
||||
const n = Number(params.n);
|
||||
const fib = slow_fibonacci(n);
|
||||
return {
|
||||
body: {
|
||||
n: n,
|
||||
fib: fib,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
ccf.enableUntrustedDateTime(true);
|
||||
|
||||
export function time_now(request) {
|
||||
|
|
|
@ -15,6 +15,7 @@ import json
|
|||
import time
|
||||
import infra.jwt_issuer
|
||||
import datetime
|
||||
import re
|
||||
from e2e_logging import test_multi_auth
|
||||
from http import HTTPStatus
|
||||
|
||||
|
@ -588,6 +589,62 @@ def test_datetime_api(network, args):
|
|||
local_epoch_start = datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
|
||||
service_epoch_start = datetime.datetime.fromisoformat(definitely_1970)
|
||||
assert local_epoch_start == service_epoch_start, service_epoch_start
|
||||
return network
|
||||
|
||||
|
||||
@reqs.description("Test metrics logging")
|
||||
def test_metrics_logging(network, args):
|
||||
primary, _ = network.find_nodes()
|
||||
|
||||
# Add and test on a new node, so we can kill it to safely read its logs
|
||||
new_node = network.create_node("local://localhost")
|
||||
network.join_node(
|
||||
new_node,
|
||||
args.package,
|
||||
args,
|
||||
)
|
||||
network.trust_node(new_node, args)
|
||||
|
||||
# Submit several requests
|
||||
assertions = []
|
||||
with new_node.client() as c:
|
||||
c.get("/app/echo")
|
||||
assertions.append({"Method": "GET", "Path": "/app/echo"})
|
||||
c.post("/app/echo")
|
||||
assertions.append({"Method": "POST", "Path": "/app/echo"})
|
||||
c.get("/app/echo/hello")
|
||||
assertions.append({"Method": "GET", "Path": "/app/echo/{foo}"})
|
||||
c.post("/app/echo/bar")
|
||||
assertions.append({"Method": "POST", "Path": "/app/echo/{foo}"})
|
||||
c.get("/app/fibonacci/10")
|
||||
assertions.append({"Method": "GET", "Path": "/app/fibonacci/{n}"})
|
||||
c.get("/app/fibonacci/20")
|
||||
assertions.append({"Method": "GET", "Path": "/app/fibonacci/{n}"})
|
||||
c.get("/app/fibonacci/30")
|
||||
assertions.append({"Method": "GET", "Path": "/app/fibonacci/{n}"})
|
||||
|
||||
# Remove node
|
||||
network.retire_node(primary, new_node)
|
||||
new_node.stop()
|
||||
|
||||
# Read node's logs
|
||||
metrics_regex = re.compile(
|
||||
r".*\[js\].*\| JS execution complete: Method=(?P<Method>.*), Path=(?P<Path>.*), Status=(?P<Status>\d+), ExecMilliseconds=(?P<ExecMilliseconds>\d+)$"
|
||||
)
|
||||
out_path, _ = new_node.get_logs()
|
||||
for line in open(out_path, "r", encoding="utf-8").readlines():
|
||||
match = metrics_regex.match(line)
|
||||
if match is not None:
|
||||
expected_groups = assertions.pop(0)
|
||||
for k, v in expected_groups.items():
|
||||
actual_match = match.group(k)
|
||||
assert actual_match == v
|
||||
LOG.success(f"Found metrics logging line: {line}")
|
||||
LOG.info(f"Parsed to: {match.groups()}")
|
||||
|
||||
assert len(assertions) == 0
|
||||
|
||||
return network
|
||||
|
||||
|
||||
def run_api(args):
|
||||
|
@ -599,6 +656,7 @@ def run_api(args):
|
|||
network.start_and_open(args)
|
||||
network = test_request_object_api(network, args)
|
||||
network = test_datetime_api(network, args)
|
||||
network = test_metrics_logging(network, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Загрузка…
Ссылка в новой задаче