diff --git a/nodemanager/core/Monitor.cpp b/nodemanager/core/Monitor.cpp index 4ee80ee..a62ae41 100644 --- a/nodemanager/core/Monitor.cpp +++ b/nodemanager/core/Monitor.cpp @@ -31,10 +31,13 @@ Monitor::Monitor(const std::string& nodeName, const std::string& netName, int in std::get<0>(this->metricData[3]) = 0; std::get<0>(this->metricData[12]) = 1; - Logger::Info("Initializing GPU driver."); + Logger::Info("Checking nvidia-smi..."); std::string output; - this->gpuInitRet = System::ExecuteCommandOut(output, "nvidia-smi -pm 1"); - Logger::Info("Initialize GPU ret code {0}", this->gpuInitRet); + this->gpuInitRet = System::ExecuteCommandOut(output, "nvidia-smi -pm 1 2>/dev/null"); + if (this->gpuInitRet != 0) + { + Logger::Warn("GPU metrics will not be collected."); + } this->collectors["\\Processor\\% Processor Time"] = std::make_shared([this] (const std::string& instanceName) { diff --git a/nodemanager/scripts/CleanupAllTasks.sh b/nodemanager/scripts/CleanupAllTasks.sh index 16c8e14..eb75728 100644 --- a/nodemanager/scripts/CleanupAllTasks.sh +++ b/nodemanager/scripts/CleanupAllTasks.sh @@ -2,7 +2,7 @@ . common.sh -docker version > /dev/nul +docker version >/dev/null 2>&1 if [ $? -eq 0 ]; then echo "Cleaning up docker containers..." containers=$(docker ps -a -q -f name=^/$(GetContainerName))