Improve mem metrics, clean up process tree code

This commit is contained in:
Tad Glines 2020-08-25 09:13:20 -07:00
Родитель 3ac03d57d1
Коммит e82bb64a17
7 изменённых файлов: 72 добавлений и 45 удалений

Просмотреть файл

@ -91,15 +91,19 @@ bool ProcMetrics::collect_metrics() {
Logger::Error("Failed to parse /proc/self/statm");
return false;
}
} else {
Logger::Error("Failed to read /proc/self/statm: No contents!");
return false;
}
} catch (std::exception& ex) {
Logger::Error("Failed to read /proc/self/statm: %s", ex.what());
return false;
}
auto mem_pct = (static_cast<double>(resident*_page_size)/static_cast<double>(_total_system_memory))*100.0;
uint64_t rss = resident*_page_size;
uint64_t virt = total*_page_size;
auto rss_mem_pct = (static_cast<double>(rss)/static_cast<double>(_total_system_memory))*100.0;
auto virt_mem_pct = (static_cast<double>(virt)/static_cast<double>(_total_system_memory))*100.0;
if (rss > _rss_limit) {
Logger::Error("RSS Limit (%ld) exceeded (%ld)", _rss_limit, rss);
@ -107,13 +111,25 @@ bool ProcMetrics::collect_metrics() {
return false;
}
if (rss_mem_pct > _rss_pct_limit) {
Logger::Error("RSS %%MEM Limit (%lf) exceeded (%lf)", _rss_pct_limit, rss_mem_pct);
_limit_fn();
return false;
}
if (virt > _virt_limit) {
Logger::Error("Virt Limit (%ld) exceeded (%ld)", _virt_limit, virt);
_limit_fn();
return false;
}
_mem_pct_metric->Update(mem_pct);
if (virt_mem_pct > _virt_pct_limit) {
Logger::Error("Virt %%MEM Limit (%lf) exceeded (%lf)", _virt_pct_limit, virt_mem_pct);
_limit_fn();
return false;
}
_mem_pct_metric->Update(rss_mem_pct);
_rss_metric->Update(static_cast<double>(rss));
_virt_metric->Update(static_cast<double>(virt));
@ -124,13 +140,17 @@ bool ProcMetrics::collect_metrics() {
for (auto& line: lines) {
if (starts_with(line, "read_bytes: ")) {
if (std::sscanf(lines[0].c_str(), "read_bytes: %lu", &read_bytes) != 1) {
Logger::Error("Failed to parse /proc/self/io");
try {
read_bytes = std::stoul(line.substr(12));
} catch (std::exception& ex) {
Logger::Error("Failed to parse read_bytes in /proc/self/io: %s", ex.what());
return false;
}
} else if (starts_with(line, "write_bytes: ")) {
if (std::sscanf(lines[0].c_str(), "write_bytes: %lu", &write_bytes) != 1) {
Logger::Error("Failed to parse /proc/self/io");
try {
write_bytes = std::stoul(line.substr(13));
} catch (std::exception& ex) {
Logger::Error("Failed to parse write_bytes in /proc/self/io: %s", ex.what());
return false;
}
}

Просмотреть файл

@ -54,8 +54,8 @@ private:
class ProcMetrics: public RunBase {
public:
ProcMetrics(const std::string& nsname, const std::shared_ptr<PriorityQueue>& queue, const std::shared_ptr<Metrics>& metrics, uint64_t rss_limit, uint64_t virt_limit, std::function<void()> limit_fn)
: _queue(queue), _metrics(metrics), _rss_limit(rss_limit), _virt_limit(virt_limit), _limit_fn(std::move(limit_fn)), _total_system_memory(0), _page_size(0), _clock(0),
ProcMetrics(const std::string& nsname, const std::shared_ptr<PriorityQueue>& queue, const std::shared_ptr<Metrics>& metrics, uint64_t rss_limit, uint64_t virt_limit, double rss_pct_limit, double virt_pct_limit, std::function<void()> limit_fn)
: _queue(queue), _metrics(metrics), _rss_limit(rss_limit), _virt_limit(virt_limit), _rss_pct_limit(rss_pct_limit), _virt_pct_limit(virt_pct_limit), _limit_fn(std::move(limit_fn)), _total_system_memory(0), _page_size(0), _clock(0),
_queue_total_metrics(metrics, nsname, "queue.total.")
{
_cpu_metric = _metrics->AddMetric(MetricType::METRIC_BY_FILL, nsname, "%cpu", MetricPeriod::SECOND, MetricPeriod::HOUR);
@ -84,6 +84,8 @@ private:
std::shared_ptr<Metrics> _metrics;
uint64_t _rss_limit;
uint64_t _virt_limit;
double _rss_pct_limit;
double _virt_pct_limit;
std::function<void()> _limit_fn;
uint64_t _total_system_memory;
long _page_size;

Просмотреть файл

@ -265,15 +265,11 @@ void ProcessTree::AddPid(int pid)
/* Process event from AuditD (execve)
Make a new entry (deleting any existing entry).
*/
std::shared_ptr<ProcessTreeItem> ProcessTree::AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, std::string exe, const std::string &cmdline)
std::shared_ptr<ProcessTreeItem> ProcessTree::AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, const std::string& exe, const std::string &cmdline)
{
std::unique_lock<std::mutex> process_write_lock(_process_write_mutex);
std::shared_ptr<ProcessTreeItem> process;
if (exe[0] == '"' && exe.back() == '"') {
exe = exe.substr(1, exe.length() - 2);
}
std::string containerid = ExtractContainerId(exe, cmdline);
auto it = _processes.find(pid);
if (it != _processes.end()) {
@ -419,18 +415,7 @@ std::shared_ptr<ProcessTreeItem> ProcessTree::GetInfoForPid(int pid)
}
}
bool ProcessTree::is_number(char *s)
{
for (char *t=s; *t != 0; t++) {
if (!isdigit(*t)) {
return false;
}
}
return true;
}
void ProcessTree::ApplyFlags(std::shared_ptr<ProcessTreeItem> process)
void ProcessTree::ApplyFlags(const std::shared_ptr<ProcessTreeItem>& process)
{
unsigned int height = 0;
process->_flags = _filtersEngine->GetFlags(process, height);
@ -476,7 +461,7 @@ void ProcessTree::PopulateTree()
_processes[pid] = process;
}
for (auto p : _processes) {
for (auto& p : _processes) {
auto process = p.second;
auto it = _processes.find(process->_ppid);
if (it != _processes.end()) {
@ -484,7 +469,7 @@ void ProcessTree::PopulateTree()
}
}
for (auto p : _processes) {
for (auto& p : _processes) {
std::shared_ptr<ProcessTreeItem> process, parent;
process = p.second;
auto it = _processes.find(process->_ppid);
@ -504,7 +489,7 @@ void ProcessTree::PopulateTree()
}
}
// Populate containerid
for (auto p : _processes) {
for (auto& p : _processes) {
auto process = p.second;
if( !(process->_containeridfromhostprocess).empty()) {
SetContainerId(process, process->_containeridfromhostprocess);
@ -515,7 +500,7 @@ void ProcessTree::PopulateTree()
void ProcessTree::UpdateFlags() {
std::unique_lock<std::mutex> process_write_lock(_process_write_mutex);
for (auto p : _processes) {
for (auto& p : _processes) {
ApplyFlags(p.second);
}
}
@ -523,7 +508,7 @@ void ProcessTree::UpdateFlags() {
// This utility method gets called only during the initial population of ProcessTree when a containerid shim process is identfied with non-empty value of _containeridfromhostprocess.
// All of its childrens get assigned with the ContainerId value recursively.
// ContainerId is not set for the containerid shim process.
void ProcessTree::SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string containerid)
void ProcessTree::SetContainerId(const std::shared_ptr<ProcessTreeItem>& p, const std::string& containerid)
{
for (auto c : p->_children) {
auto it2 = _processes.find(c);
@ -535,7 +520,7 @@ void ProcessTree::SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string
}
}
std::string ProcessTree::ExtractContainerId(std::string exe, const std::string& cmdline)
std::string ProcessTree::ExtractContainerId(const std::string& exe, const std::string& cmdline)
{
// cmdline example:
//containerd-shim -namespace moby
@ -587,7 +572,7 @@ std::shared_ptr<ProcessTreeItem> ProcessTree::ReadProcEntry(int pid)
void ProcessTree::ShowTree()
{
for (auto p : _processes) {
for (auto& p : _processes) {
ShowProcess(p.second);
for (auto c : p.second->_children) {
if (_processes.count(c) > 0) {

Просмотреть файл

@ -109,7 +109,7 @@ public:
void AddPnForkQueue(int pid, int ppid);
void AddPnExecQueue(int pid);
void AddPnExitQueue(int pid);
std::shared_ptr<ProcessTreeItem> AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, std::string exe, const std::string& cmdline);
std::shared_ptr<ProcessTreeItem> AddProcess(enum ProcessTreeSource source, int pid, int ppid, int uid, int gid, const std::string& exe, const std::string& cmdline);
void Clean();
std::shared_ptr<ProcessTreeItem> GetInfoForPid(int pid);
void PopulateTree();
@ -125,13 +125,10 @@ private:
void AddPid(int pid, int ppid);
void AddPid(int pid);
void RemovePid(int pid);
std::string ReadFirstLine(const std::string& file);
std::string ReadParam(const std::string& file, const std::string& param);
std::shared_ptr<ProcessTreeItem> ReadProcEntry(int pid);
bool is_number(char *s);
void ApplyFlags(std::shared_ptr<ProcessTreeItem> process);
void SetContainerId(std::shared_ptr<ProcessTreeItem> p, std::string containerid);
std::string ExtractContainerId(std::string exe, const std::string& cmdline);
void ApplyFlags(const std::shared_ptr<ProcessTreeItem>& process);
void SetContainerId(const std::shared_ptr<ProcessTreeItem>& p, const std::string& containerid);
std::string ExtractContainerId(const std::string& exe, const std::string& cmdline);
std::shared_ptr<UserDB> _user_db;
std::shared_ptr<FiltersEngine> _filtersEngine;

Просмотреть файл

@ -626,6 +626,9 @@ bool RawEventProcessor::process_syscall_event(const Event& event) {
std::string cmdline;
if (!_syscall.empty() && starts_with(_syscall, "execve")) {
if (!exe.empty() && exe.front() == '"' && exe.back() == '"') {
exe = exe.substr(1, exe.length() - 2);
}
p = _processTree->AddProcess(ProcessTreeSource_execve, _pid, _ppid, uid, gid, exe, _cmdline);
} else if (!_syscall.empty()) {
p = _processTree->GetInfoForPid(_pid);

Просмотреть файл

@ -249,17 +249,27 @@ int main(int argc, char**argv) {
lock_file = config.GetString("lock_file");
}
uint64_t rss_limit = 384*1024*1024;
uint64_t virt_limit = 1024*1024*1024;
uint64_t rss_limit = 1024L*1024L*1024L;
uint64_t virt_limit = 2048L*1024L*1024L;
double rss_pct_limit = 5;
double virt_pct_limit = 30;
if (config.HasKey("rss_limit")) {
rss_limit = config.GetUint64("rss_limit");
}
if (config.HasKey("rss_pct_limit")) {
rss_pct_limit = config.GetDouble("rss_pct_limit");
}
if (config.HasKey("virt_limit")) {
virt_limit = config.GetUint64("virt_limit");
}
if (config.HasKey("virt_pct_limit")) {
virt_pct_limit = config.GetDouble("virt_pct_limit");
}
bool use_syslog = true;
if (config.HasKey("use_syslog")) {
use_syslog = config.GetBool("use_syslog");
@ -328,7 +338,7 @@ int main(int argc, char**argv) {
auto system_metrics = std::make_shared<SystemMetrics>(metrics);
system_metrics->Start();
auto proc_metrics = std::make_shared<ProcMetrics>("auoms", queue, metrics, rss_limit, virt_limit, []() {
auto proc_metrics = std::make_shared<ProcMetrics>("auoms", queue, metrics, rss_limit, virt_limit, rss_pct_limit, virt_pct_limit, []() {
Logger::Error("A memory limit was exceeded, exiting immediately");
exit(1);
});

Просмотреть файл

@ -427,17 +427,27 @@ int main(int argc, char**argv) {
lock_file = config.GetString("lock_file");
}
uint64_t rss_limit = 256*1024*1024;
uint64_t virt_limit = 1024*1024*1024;
uint64_t rss_limit = 256L*1024L*1024L;
uint64_t virt_limit = 1024L*1024L*1024L;
double rss_pct_limit = 2;
double virt_pct_limit = 15;
if (config.HasKey("rss_limit")) {
rss_limit = config.GetUint64("rss_limit");
}
if (config.HasKey("rss_pct_limit")) {
rss_pct_limit = config.GetDouble("rss_pct_limit");
}
if (config.HasKey("virt_limit")) {
virt_limit = config.GetUint64("virt_limit");
}
if (config.HasKey("virt_pct_limit")) {
virt_pct_limit = config.GetDouble("virt_pct_limit");
}
int cpu_nice = -20;
if (config.HasKey("cpu_nice")) {
cpu_nice = config.GetInt64("cpu_nice");
@ -509,7 +519,7 @@ int main(int argc, char**argv) {
auto metrics = std::make_shared<Metrics>(queue);
metrics->Start();
auto proc_metrics = std::make_shared<ProcMetrics>("auomscollect", queue, metrics, rss_limit, virt_limit, []() {
auto proc_metrics = std::make_shared<ProcMetrics>("auomscollect", queue, metrics, rss_limit, virt_limit, rss_pct_limit, virt_pct_limit, []() {
Logger::Error("A memory limit was exceeded, exiting immediately");
exit(1);
});