Improve mem metrics, clean up process tree code
This commit is contained in:
Родитель
3ac03d57d1
Коммит
e82bb64a17
|
@ -91,15 +91,19 @@ bool ProcMetrics::collect_metrics() {
|
|||
Logger::Error("Failed to parse /proc/self/statm");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
Logger::Error("Failed to read /proc/self/statm: No contents!");
|
||||
return false;
|
||||
}
|
||||
} catch (std::exception& ex) {
|
||||
Logger::Error("Failed to read /proc/self/statm: %s", ex.what());
|
||||
return false;
|
||||
}
|
||||
|
||||
auto mem_pct = (static_cast<double>(resident*_page_size)/static_cast<double>(_total_system_memory))*100.0;
|
||||
uint64_t rss = resident*_page_size;
|
||||
uint64_t virt = total*_page_size;
|
||||
auto rss_mem_pct = (static_cast<double>(rss)/static_cast<double>(_total_system_memory))*100.0;
|
||||
auto virt_mem_pct = (static_cast<double>(virt)/static_cast<double>(_total_system_memory))*100.0;
|
||||
|
||||
if (rss > _rss_limit) {
|
||||
Logger::Error("RSS Limit (%ld) exceeded (%ld)", _rss_limit, rss);
|
||||
|
@ -107,13 +111,25 @@ bool ProcMetrics::collect_metrics() {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (rss_mem_pct > _rss_pct_limit) {
|
||||
Logger::Error("RSS %%MEM Limit (%lf) exceeded (%lf)", _rss_pct_limit, rss_mem_pct);
|
||||
_limit_fn();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (virt > _virt_limit) {
|
||||
Logger::Error("Virt Limit (%ld) exceeded (%ld)", _virt_limit, virt);
|
||||
_limit_fn();
|
||||
return false;
|
||||
}
|
||||
|
||||
_mem_pct_metric->Update(mem_pct);
|
||||
if (virt_mem_pct > _virt_pct_limit) {
|
||||
Logger::Error("Virt %%MEM Limit (%lf) exceeded (%lf)", _virt_pct_limit, virt_mem_pct);
|
||||
_limit_fn();
|
||||
return false;
|
||||
}
|
||||
|
||||
_mem_pct_metric->Update(rss_mem_pct);
|
||||
_rss_metric->Update(static_cast<double>(rss));
|
||||
_virt_metric->Update(static_cast<double>(virt));
|
||||
|
||||
|
@ -124,13 +140,17 @@ bool ProcMetrics::collect_metrics() {
|
|||
|
||||
for (auto& line: lines) {
|
||||
if (starts_with(line, "read_bytes: ")) {
|
||||
if (std::sscanf(lines[0].c_str(), "read_bytes: %lu", &read_bytes) != 1) {
|
||||
Logger::Error("Failed to parse /proc/self/io");
|
||||
try {
|
||||
read_bytes = std::stoul(line.substr(12));
|
||||
} catch (std::exception& ex) {
|
||||
Logger::Error("Failed to parse read_bytes in /proc/self/io: %s", ex.what());
|
||||
return false;
|
||||
}
|
||||
} else if (starts_with(line, "write_bytes: ")) {
|
||||
if (std::sscanf(lines[0].c_str(), "write_bytes: %lu", &write_bytes) != 1) {
|
||||
Logger::Error("Failed to parse /proc/self/io");
|
||||
try {
|
||||
write_bytes = std::stoul(line.substr(13));
|
||||
} catch (std::exception& ex) {
|
||||
Logger::Error("Failed to parse write_bytes in /proc/self/io: %s", ex.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,8 +54,8 @@ private:
|
|||
|
||||
class ProcMetrics: public RunBase {
|
||||
public:
|
||||
ProcMetrics(const std::string& nsname, const std::shared_ptr<PriorityQueue>& queue, const std::shared_ptr<Metrics>& metrics, uint64_t rss_limit, uint64_t virt_limit, std::function<void()> limit_fn)
|
||||
: _queue(queue), _metrics(metrics), _rss_limit(rss_limit), _virt_limit(virt_limit), _limit_fn(std::move(limit_fn)), _total_system_memory(0), _page_size(0), _clock(0),
|
||||
ProcMetrics(const std::string& nsname, const std::shared_ptr<PriorityQueue>& queue, const std::shared_ptr<Metrics>& metrics, uint64_t rss_limit, uint64_t virt_limit, double rss_pct_limit, double virt_pct_limit, std::function<void()> limit_fn)
|
||||
: _queue(queue), _metrics(metrics), _rss_limit(rss_limit), _virt_limit(virt_limit), _rss_pct_limit(rss_pct_limit), _virt_pct_limit(virt_pct_limit), _limit_fn(std::move(limit_fn)), _total_system_memory(0), _page_size(0), _clock(0),
|
||||
_queue_total_metrics(metrics, nsname, "queue.total.")
|
||||
{
|
||||
_cpu_metric = _metrics->AddMetric(MetricType::METRIC_BY_FILL, nsname, "%cpu", MetricPeriod::SECOND, MetricPeriod::HOUR);
|
||||
|
@ -84,6 +84,8 @@ private:
|
|||
std::shared_ptr<Metrics> _metrics;
|
||||
uint64_t _rss_limit;
|
||||
uint64_t _virt_limit;
|
||||
double _rss_pct_limit;
|
||||
double _virt_pct_limit;
|
||||
std::function<void()> _limit_fn;
|
||||
uint64_t _total_system_memory;
|
||||
long _page_size;
|
||||
|
|
|
@ -265,15 +265,11 @@ void ProcessTree::AddPid(int pid)
|
|||
/* Process event from AuditD (execve)
|
||||
Make a new entry (deleting any existing entry).
|
||||
*/
|
||||
std::shared_ptr<ProcessTreeItem> ProcessTree::AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, std::string exe, const std::string &cmdline)
|
||||
std::shared_ptr<ProcessTreeItem> ProcessTree::AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, const std::string& exe, const std::string &cmdline)
|
||||
{
|
||||
std::unique_lock<std::mutex> process_write_lock(_process_write_mutex);
|
||||
std::shared_ptr<ProcessTreeItem> process;
|
||||
|
||||
if (exe[0] == '"' && exe.back() == '"') {
|
||||
exe = exe.substr(1, exe.length() - 2);
|
||||
}
|
||||
|
||||
std::string containerid = ExtractContainerId(exe, cmdline);
|
||||
auto it = _processes.find(pid);
|
||||
if (it != _processes.end()) {
|
||||
|
@ -419,18 +415,7 @@ std::shared_ptr<ProcessTreeItem> ProcessTree::GetInfoForPid(int pid)
|
|||
}
|
||||
}
|
||||
|
||||
bool ProcessTree::is_number(char *s)
|
||||
{
|
||||
for (char *t=s; *t != 0; t++) {
|
||||
if (!isdigit(*t)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ProcessTree::ApplyFlags(std::shared_ptr<ProcessTreeItem> process)
|
||||
void ProcessTree::ApplyFlags(const std::shared_ptr<ProcessTreeItem>& process)
|
||||
{
|
||||
unsigned int height = 0;
|
||||
process->_flags = _filtersEngine->GetFlags(process, height);
|
||||
|
@ -476,7 +461,7 @@ void ProcessTree::PopulateTree()
|
|||
_processes[pid] = process;
|
||||
}
|
||||
|
||||
for (auto p : _processes) {
|
||||
for (auto& p : _processes) {
|
||||
auto process = p.second;
|
||||
auto it = _processes.find(process->_ppid);
|
||||
if (it != _processes.end()) {
|
||||
|
@ -484,7 +469,7 @@ void ProcessTree::PopulateTree()
|
|||
}
|
||||
}
|
||||
|
||||
for (auto p : _processes) {
|
||||
for (auto& p : _processes) {
|
||||
std::shared_ptr<ProcessTreeItem> process, parent;
|
||||
process = p.second;
|
||||
auto it = _processes.find(process->_ppid);
|
||||
|
@ -504,7 +489,7 @@ void ProcessTree::PopulateTree()
|
|||
}
|
||||
}
|
||||
// Populate containerid
|
||||
for (auto p : _processes) {
|
||||
for (auto& p : _processes) {
|
||||
auto process = p.second;
|
||||
if( !(process->_containeridfromhostprocess).empty()) {
|
||||
SetContainerId(process, process->_containeridfromhostprocess);
|
||||
|
@ -515,7 +500,7 @@ void ProcessTree::PopulateTree()
|
|||
void ProcessTree::UpdateFlags() {
|
||||
std::unique_lock<std::mutex> process_write_lock(_process_write_mutex);
|
||||
|
||||
for (auto p : _processes) {
|
||||
for (auto& p : _processes) {
|
||||
ApplyFlags(p.second);
|
||||
}
|
||||
}
|
||||
|
@ -523,7 +508,7 @@ void ProcessTree::UpdateFlags() {
|
|||
// This utility method gets called only during the initial population of ProcessTree when a containerid shim process is identfied with non-empty value of _containeridfromhostprocess.
|
||||
// All of its childrens get assigned with the ContainerId value recursively.
|
||||
// ContainerId is not set for the containerid shim process.
|
||||
void ProcessTree::SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string containerid)
|
||||
void ProcessTree::SetContainerId(const std::shared_ptr<ProcessTreeItem>& p, const std::string& containerid)
|
||||
{
|
||||
for (auto c : p->_children) {
|
||||
auto it2 = _processes.find(c);
|
||||
|
@ -535,7 +520,7 @@ void ProcessTree::SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string
|
|||
}
|
||||
}
|
||||
|
||||
std::string ProcessTree::ExtractContainerId(std::string exe, const std::string& cmdline)
|
||||
std::string ProcessTree::ExtractContainerId(const std::string& exe, const std::string& cmdline)
|
||||
{
|
||||
// cmdline example:
|
||||
//containerd-shim -namespace moby
|
||||
|
@ -587,7 +572,7 @@ std::shared_ptr<ProcessTreeItem> ProcessTree::ReadProcEntry(int pid)
|
|||
|
||||
void ProcessTree::ShowTree()
|
||||
{
|
||||
for (auto p : _processes) {
|
||||
for (auto& p : _processes) {
|
||||
ShowProcess(p.second);
|
||||
for (auto c : p.second->_children) {
|
||||
if (_processes.count(c) > 0) {
|
||||
|
|
|
@ -109,7 +109,7 @@ public:
|
|||
void AddPnForkQueue(int pid, int ppid);
|
||||
void AddPnExecQueue(int pid);
|
||||
void AddPnExitQueue(int pid);
|
||||
std::shared_ptr<ProcessTreeItem> AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, std::string exe, const std::string& cmdline);
|
||||
std::shared_ptr<ProcessTreeItem> AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, const std::string& exe, const std::string& cmdline);
|
||||
void Clean();
|
||||
std::shared_ptr<ProcessTreeItem> GetInfoForPid(int pid);
|
||||
void PopulateTree();
|
||||
|
@ -125,13 +125,10 @@ private:
|
|||
void AddPid(int pid, int ppid);
|
||||
void AddPid(int pid);
|
||||
void RemovePid(int pid);
|
||||
std::string ReadFirstLine(const std::string& file);
|
||||
std::string ReadParam(const std::string& file, const std::string& param);
|
||||
std::shared_ptr<ProcessTreeItem> ReadProcEntry(int pid);
|
||||
bool is_number(char *s);
|
||||
void ApplyFlags(std::shared_ptr<ProcessTreeItem> process);
|
||||
void SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string containerid);
|
||||
std::string ExtractContainerId(std::string exe, const std::string& cmdline);
|
||||
void ApplyFlags(const std::shared_ptr<ProcessTreeItem>& process);
|
||||
void SetContainerId(const std::shared_ptr<ProcessTreeItem>& p, const std::string& containerid);
|
||||
std::string ExtractContainerId(const std::string& exe, const std::string& cmdline);
|
||||
|
||||
std::shared_ptr<UserDB> _user_db;
|
||||
std::shared_ptr<FiltersEngine> _filtersEngine;
|
||||
|
|
|
@ -626,6 +626,9 @@ bool RawEventProcessor::process_syscall_event(const Event& event) {
|
|||
std::string cmdline;
|
||||
|
||||
if (!_syscall.empty() && starts_with(_syscall, "execve")) {
|
||||
if (!exe.empty() && exe.front() == '"' && exe.back() == '"') {
|
||||
exe = exe.substr(1, exe.length() - 2);
|
||||
}
|
||||
p = _processTree->AddProcess(ProcessTreeSource_execve, _pid, _ppid, uid, gid, exe, _cmdline);
|
||||
} else if (!_syscall.empty()) {
|
||||
p = _processTree->GetInfoForPid(_pid);
|
||||
|
|
16
auoms.cpp
16
auoms.cpp
|
@ -249,17 +249,27 @@ int main(int argc, char**argv) {
|
|||
lock_file = config.GetString("lock_file");
|
||||
}
|
||||
|
||||
uint64_t rss_limit = 384*1024*1024;
|
||||
uint64_t virt_limit = 1024*1024*1024;
|
||||
uint64_t rss_limit = 1024L*1024L*1024L;
|
||||
uint64_t virt_limit = 2048L*1024L*1024L;
|
||||
double rss_pct_limit = 5;
|
||||
double virt_pct_limit = 30;
|
||||
|
||||
if (config.HasKey("rss_limit")) {
|
||||
rss_limit = config.GetUint64("rss_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("rss_pct_limit")) {
|
||||
rss_pct_limit = config.GetDouble("rss_pct_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("virt_limit")) {
|
||||
virt_limit = config.GetUint64("virt_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("virt_pct_limit")) {
|
||||
virt_pct_limit = config.GetDouble("virt_pct_limit");
|
||||
}
|
||||
|
||||
bool use_syslog = true;
|
||||
if (config.HasKey("use_syslog")) {
|
||||
use_syslog = config.GetBool("use_syslog");
|
||||
|
@ -328,7 +338,7 @@ int main(int argc, char**argv) {
|
|||
auto system_metrics = std::make_shared<SystemMetrics>(metrics);
|
||||
system_metrics->Start();
|
||||
|
||||
auto proc_metrics = std::make_shared<ProcMetrics>("auoms", queue, metrics, rss_limit, virt_limit, []() {
|
||||
auto proc_metrics = std::make_shared<ProcMetrics>("auoms", queue, metrics, rss_limit, virt_limit, rss_pct_limit, virt_pct_limit, []() {
|
||||
Logger::Error("A memory limit was exceeded, exiting immediately");
|
||||
exit(1);
|
||||
});
|
||||
|
|
|
@ -427,17 +427,27 @@ int main(int argc, char**argv) {
|
|||
lock_file = config.GetString("lock_file");
|
||||
}
|
||||
|
||||
uint64_t rss_limit = 256*1024*1024;
|
||||
uint64_t virt_limit = 1024*1024*1024;
|
||||
uint64_t rss_limit = 256L*1024L*1024L;
|
||||
uint64_t virt_limit = 1024L*1024L*1024L;
|
||||
double rss_pct_limit = 2;
|
||||
double virt_pct_limit = 15;
|
||||
|
||||
if (config.HasKey("rss_limit")) {
|
||||
rss_limit = config.GetUint64("rss_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("rss_pct_limit")) {
|
||||
rss_pct_limit = config.GetDouble("rss_pct_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("virt_limit")) {
|
||||
virt_limit = config.GetUint64("virt_limit");
|
||||
}
|
||||
|
||||
if (config.HasKey("virt_pct_limit")) {
|
||||
virt_pct_limit = config.GetDouble("virt_pct_limit");
|
||||
}
|
||||
|
||||
int cpu_nice = -20;
|
||||
if (config.HasKey("cpu_nice")) {
|
||||
cpu_nice = config.GetInt64("cpu_nice");
|
||||
|
@ -509,7 +519,7 @@ int main(int argc, char**argv) {
|
|||
auto metrics = std::make_shared<Metrics>(queue);
|
||||
metrics->Start();
|
||||
|
||||
auto proc_metrics = std::make_shared<ProcMetrics>("auomscollect", queue, metrics, rss_limit, virt_limit, []() {
|
||||
auto proc_metrics = std::make_shared<ProcMetrics>("auomscollect", queue, metrics, rss_limit, virt_limit, rss_pct_limit, virt_pct_limit, []() {
|
||||
Logger::Error("A memory limit was exceeded, exiting immediately");
|
||||
exit(1);
|
||||
});
|
||||
|
|
Загрузка…
Ссылка в новой задаче