642 строки
26 KiB
C++
642 строки
26 KiB
C++
#ifndef VERSION_H_INCLUDED
|
|
#define VERSION_H_INCLUDED
|
|
|
|
#include <string>
|
|
#include <map>
|
|
#include <vector>
|
|
#include <iostream>
|
|
|
|
namespace hpc
|
|
{
|
|
class Version
|
|
{
|
|
public:
|
|
static const std::vector<std::pair<std::string, std::vector<std::string>>>& GetVersionHistory()
|
|
{
|
|
static std::vector<std::pair<std::string, std::vector<std::string>>> versionHistory =
|
|
{
|
|
{ "1.1.1.1",
|
|
{
|
|
"Node manager main functionality support",
|
|
"Added version support",
|
|
"Fixed network card reversed order issue",
|
|
"Added trace",
|
|
"Added error codes definition",
|
|
"Fixed a potential node error issue",
|
|
}
|
|
},
|
|
{ "1.1.1.2",
|
|
{
|
|
"Fixed a long running issue because of callback failure",
|
|
"Added version history support",
|
|
}
|
|
},
|
|
{ "1.1.1.3",
|
|
{
|
|
"Fixed a long running issue because of callback contract mismatch",
|
|
}
|
|
},
|
|
{ "1.1.1.4",
|
|
{
|
|
"Retry when create cgroup failed",
|
|
"Return the exit code and error message when PrepareTask",
|
|
"Record the output to message in Process",
|
|
}
|
|
},
|
|
{ "1.1.1.5",
|
|
{
|
|
"Print out version history",
|
|
}
|
|
},
|
|
{ "1.1.1.6",
|
|
{
|
|
"Changed the process chain which will handle the user permission, std streams better",
|
|
"Added utilities to handle users",
|
|
"Added unit test framework",
|
|
"Added test case for some echo Process",
|
|
"Fixed a bug that a requeued task cannot be canceled",
|
|
"Fixed a bug that json not-existing value is handled incorrectly",
|
|
}
|
|
},
|
|
{ "1.1.1.7",
|
|
{
|
|
"Make file dependency auto detect",
|
|
}
|
|
},
|
|
{ "1.1.1.8",
|
|
{
|
|
"Set timeout to let callback release locks to avoid deadlock",
|
|
"Request resync when callback fails",
|
|
"Adjust the maximum log size to avoid flushing important logs",
|
|
"Fixed the detection method of cgroup in CentOS 6.6",
|
|
"Make the return message of a task more verbose",
|
|
"Verify the cgroup creation status at the end of PrepareTask",
|
|
"Adjusted makefile output format",
|
|
}
|
|
},
|
|
{ "1.1.1.9",
|
|
{
|
|
"Fixed the default working directory ownership",
|
|
"Write to the task message and log only when failure happened",
|
|
"Configure CentOS sudoers to allow non-tty execution",
|
|
"Changed test user name in unit test case",
|
|
"Won't delete the user in job ending to avoid multi-job interference",
|
|
}
|
|
},
|
|
{ "1.1.1.10",
|
|
{
|
|
"Try to fix a thread pool leak issue",
|
|
"Support CGroup mounting point on CentOS 6.6",
|
|
}
|
|
},
|
|
{ "1.1.1.11",
|
|
{
|
|
"Fix EndTask race condition with Task Completion",
|
|
"Order the version display",
|
|
}
|
|
},
|
|
{ "1.2.1.1",
|
|
{
|
|
"Support CPU affinity",
|
|
"Cleanup the user ssh keys after job finish",
|
|
"Report the signal which kills the process",
|
|
"Fix a user name mis-match issue",
|
|
}
|
|
},
|
|
{ "1.2.1.2",
|
|
{
|
|
"Wait for the node trust before running mpi job",
|
|
}
|
|
},
|
|
{ "1.2.1.3",
|
|
{
|
|
"Run as root when the user is admin on head node",
|
|
"Change monitoring data to go through UDP packet",
|
|
}
|
|
},
|
|
{ "1.2.1.4",
|
|
{
|
|
"Fix the monitoring data big-endian issue",
|
|
}
|
|
},
|
|
{ "1.2.1.5",
|
|
{
|
|
"Fix the report uri cannot be resolved issue",
|
|
}
|
|
},
|
|
{ "1.2.1.6",
|
|
{
|
|
"Correct the authorized_keys file permission",
|
|
"Report resource usage through cgroup statistics",
|
|
"Added Unit test for process count",
|
|
}
|
|
},
|
|
{ "1.2.1.7",
|
|
{
|
|
"Set correct permissions on key files",
|
|
}
|
|
},
|
|
{ "1.2.1.8",
|
|
{
|
|
"Remove the user even if it is logged on",
|
|
"Won't overwrite the keys if private key is not created",
|
|
"Work around the bug of removing multi-subsys cgroup in old version of CGroup tools",
|
|
"Fix the network usage monitoring issue",
|
|
}
|
|
},
|
|
{ "1.2.1.9",
|
|
{
|
|
"Placeholder for linux extension bug fix",
|
|
}
|
|
},
|
|
{ "1.2.1.10",
|
|
{
|
|
"Added logs for users left problem",
|
|
}
|
|
},
|
|
{ "1.2.1.11",
|
|
{
|
|
"Treat the return value of the heart beat to adjust interval",
|
|
}
|
|
},
|
|
{ "1.2.1.12",
|
|
{
|
|
"Fix the virtual method initialization race condition",
|
|
}
|
|
},
|
|
{ "1.2.1.13",
|
|
{
|
|
"Fix the crashing issue of not properly initialized thread ID",
|
|
"Fix the trust failure issue caused by a wrongly specified node name",
|
|
}
|
|
},
|
|
{ "1.2.1.14",
|
|
{
|
|
"Kill all processes associated with the user before deleting user",
|
|
}
|
|
},
|
|
{ "1.2.1.15",
|
|
{
|
|
"Fix a crash issue due to the reporter thread race condition",
|
|
"Retry when the CGroup tools exit with code 82 and 96, CGroup tools bug",
|
|
"Change wait for trust period to 30 seconds",
|
|
"Keep all untrusted history",
|
|
}
|
|
},
|
|
{ "1.2.1.16",
|
|
{
|
|
"Retry for permission issue of stdout and stderr",
|
|
"Refine the logic of keeping the state when trust failed",
|
|
"Fix a crash issue of reporter",
|
|
}
|
|
},
|
|
{ "1.2.1.17",
|
|
{
|
|
"Keep the task folder in failed situation",
|
|
"Test stdout/stderr file access before and after the task run",
|
|
"Keep the trust related logs in separate folders per tasks",
|
|
"Enhance the log of wait for trust",
|
|
}
|
|
},
|
|
{ "1.2.1.18",
|
|
{
|
|
"Skip deleting users",
|
|
}
|
|
},
|
|
{ "1.2.1.19",
|
|
{
|
|
"Enhance logs for trouble shooting",
|
|
}
|
|
},
|
|
{ "1.2.1.20",
|
|
{
|
|
"Enhance wait for trust logs for trouble shooting",
|
|
}
|
|
},
|
|
{ "1.2.1.21",
|
|
{
|
|
"Capture the error code for popen error",
|
|
"Tune the wait for trust period",
|
|
}
|
|
},
|
|
{ "1.2.1.22",
|
|
{
|
|
"Turned on ssh verbose log",
|
|
}
|
|
},
|
|
{ "1.2.1.23",
|
|
{
|
|
"Prints ssh verbose log directly",
|
|
}
|
|
},
|
|
{ "1.3.1.1",
|
|
{
|
|
"Add graceful preemption support",
|
|
"Prevent the common user with root name to run as root",
|
|
}
|
|
},
|
|
{ "1.3.1.2",
|
|
{
|
|
"Kill the process immediately after the period expires",
|
|
}
|
|
},
|
|
{ "1.3.1.3",
|
|
{
|
|
"Cleaning up zombie processes when startup",
|
|
}
|
|
},
|
|
{ "1.3.1.4",
|
|
{
|
|
"Enable clusrun support",
|
|
}
|
|
},
|
|
{ "1.3.1.5",
|
|
{
|
|
"Fix the stdout/stderr/stdin handling issues",
|
|
}
|
|
},
|
|
{ "1.3.1.6",
|
|
{
|
|
"Fix the virtual method initialization race condition",
|
|
"Print meaningful error messages when node trust fails",
|
|
}
|
|
},
|
|
{ "1.3.1.7",
|
|
{
|
|
"Exit immediately if the port cannot be opened",
|
|
}
|
|
},
|
|
{ "1.3.1.8",
|
|
{
|
|
"Merging with 1.2",
|
|
}
|
|
},
|
|
{ "1.3.1.9",
|
|
{
|
|
"Fix the exit code not captured issue",
|
|
"Terminate the task properly when end task called",
|
|
}
|
|
},
|
|
{ "1.3.1.10",
|
|
{
|
|
"Avoid duplicated cleanup of zombie process",
|
|
"Fixed a crash due to merge of the code",
|
|
"Print an error message when node trust failed",
|
|
}
|
|
},
|
|
{ "1.3.1.11",
|
|
{
|
|
"Added logs for trust process",
|
|
}
|
|
},
|
|
{ "1.3.1.12",
|
|
{
|
|
"Work around a ssh command line issue",
|
|
"Removed an unnecessary error message",
|
|
}
|
|
},
|
|
{ "1.3.1.13",
|
|
{
|
|
"Fix the network usage collection error on CentOS65",
|
|
"Fix the network usage data not precise issue",
|
|
"Avoid sending the EOF output back when not using stream output",
|
|
}
|
|
},
|
|
{ "1.3.1.14",
|
|
{
|
|
"Clear the environment buffer when rerun",
|
|
"Inherit the path from the user specified",
|
|
"Change the node manager port",
|
|
}
|
|
},
|
|
{ "1.3.1.15",
|
|
{
|
|
"Change the wait time for trust",
|
|
"Cancel the graceful period thread if the task completed by itself",
|
|
}
|
|
},
|
|
{ "1.4.1.1",
|
|
{
|
|
"Fix a bug to remember the graceful period thread Id",
|
|
"Terminate the task forcefully if the graceful period is 0",
|
|
}
|
|
},
|
|
{ "1.4.2.1",
|
|
{
|
|
"Change the kill wait time to 1s",
|
|
"Only terminate the original process key when graceful period elapsed",
|
|
"Cleanup the whole CGroup when the main process exit",
|
|
}
|
|
},
|
|
{ "1.4.3.1",
|
|
{
|
|
"Fix a security issue or running as root when the user has no privilege",
|
|
}
|
|
},
|
|
{ "1.4.4.1",
|
|
{
|
|
"Fix the memory leak caused by deadlock",
|
|
"Work around the cgroup creation failure",
|
|
"Tuned the wait time of trust",
|
|
}
|
|
},
|
|
{ "1.4.5.1",
|
|
{
|
|
"Fix the trust script sub process id capture issue",
|
|
}
|
|
},
|
|
{ "1.4.5.1",
|
|
{
|
|
"Fix the trust script sub process id capture issue",
|
|
}
|
|
},
|
|
{ "1.5.1.0",
|
|
{
|
|
"VM extension script fix",
|
|
}
|
|
},
|
|
{ "1.6.1.0",
|
|
{
|
|
"Cleanup warnings from ShellCheck tool",
|
|
}
|
|
},
|
|
{ "1.6.2.0",
|
|
{
|
|
"Adding a configuration file nodemanager.json",
|
|
"Add the authentication key support",
|
|
"Add the https support",
|
|
}
|
|
},
|
|
{ "1.6.3.0",
|
|
{
|
|
"Metric configuration support (framework)",
|
|
}
|
|
},
|
|
{ "1.6.4.0",
|
|
{
|
|
"Enabled customized CA",
|
|
}
|
|
},
|
|
{ "1.6.5.0",
|
|
{
|
|
"Added debug mode",
|
|
"Added several metrics plugin",
|
|
}
|
|
},
|
|
{ "1.6.6.0",
|
|
{
|
|
"Bind to any address when listening",
|
|
}
|
|
},
|
|
{ "1.6.7.0",
|
|
{
|
|
"Fix network usage collection issue",
|
|
}
|
|
},
|
|
{ "1.6.8.0",
|
|
{
|
|
"Change the Casablanca to use the construction time callback",
|
|
"Fix the environment PATH issue",
|
|
}
|
|
},
|
|
{ "1.6.9.0",
|
|
{
|
|
"Remove the nginx dependency, support https natively",
|
|
}
|
|
},
|
|
{ "1.6.10.0",
|
|
{
|
|
"Fix a reporting thread crash issue",
|
|
}
|
|
},
|
|
{ "1.6.11.0",
|
|
{
|
|
"Fix a node error issue caused by rare case deadlock",
|
|
"Upgrade to latest spdlog library",
|
|
}
|
|
},
|
|
{ "1.6.12.0",
|
|
{
|
|
"Added hosts file support",
|
|
}
|
|
},
|
|
{ "1.6.13.0",
|
|
{
|
|
"Fix a deadlock caused by the input string stream reading",
|
|
}
|
|
},
|
|
{ "1.6.14.0",
|
|
{
|
|
"Give the statistics an initial value to avoid overflow in scheduler database",
|
|
}
|
|
},
|
|
{ "1.6.15.0",
|
|
{
|
|
"Fix a security issue which will disclose secrets",
|
|
}
|
|
},
|
|
{ "1.6.16.0",
|
|
{
|
|
"Atomically change the configuration file",
|
|
}
|
|
},
|
|
{ "1.6.17.0",
|
|
{
|
|
"Fix the host file manager bug",
|
|
}
|
|
},
|
|
{ "1.6.18.0",
|
|
{
|
|
"Fix the cgroup root path issue",
|
|
}
|
|
},
|
|
{ "1.6.19.0",
|
|
{
|
|
"Upgrade to use the latest cpprestsdk",
|
|
"Fix a memory leak issue in the cpprestsdk",
|
|
}
|
|
},
|
|
{ "1.7.1.0",
|
|
{
|
|
"Added the execution filter support",
|
|
"Added unit test for execution filter",
|
|
"Improved the unit test framework",
|
|
"Fixed some problems in json format processing",
|
|
}
|
|
},
|
|
{ "1.7.2.0",
|
|
{
|
|
"Fix the node unreachable error",
|
|
"Detect user's home directory instead of hard code",
|
|
"Skip the plugin if the file doesn't exist",
|
|
"Pass home directory to user's process",
|
|
"Added environment variable controlling bypass behavior of domain name on user name",
|
|
}
|
|
},
|
|
{ "1.7.3.0",
|
|
{
|
|
"Fix two node manager crash issues",
|
|
}
|
|
},
|
|
{ "1.7.4.0",
|
|
{
|
|
"Fix one node manager crash issue",
|
|
}
|
|
},
|
|
{ "1.7.5.0",
|
|
{
|
|
"Fix one node manager crash issue",
|
|
"Fix the TaskStart filter",
|
|
"Fix a security issue",
|
|
"Fix a typo in the log",
|
|
}
|
|
},
|
|
{ "1.7.6.0",
|
|
{
|
|
"Ignore the JobEndFilter exception",
|
|
"Cleanup the execution filter folder",
|
|
}
|
|
},
|
|
{ "1.7.7.0",
|
|
{
|
|
"Retry the heartbeat when failed to minimize heartbeat lost",
|
|
"Isolate the filter execution from task execution",
|
|
"Refine the folder name of filter execution directory",
|
|
}
|
|
},
|
|
{ "1.7.8.0",
|
|
{
|
|
"Report instance level GPU metrics",
|
|
"Register with GPU information",
|
|
}
|
|
},
|
|
{ "1.7.9.0",
|
|
{
|
|
"Fix a metric packet count issue",
|
|
}
|
|
},
|
|
{ "1.7.10.0",
|
|
{
|
|
"Fix a bug of total memory data in metrics",
|
|
}
|
|
},
|
|
{ "1.7.11.0",
|
|
{
|
|
"Fix a crash issue when cancelling a job",
|
|
"Fix a process statistics issue when cancelling a job which results in the job stuck in cancelling state",
|
|
}
|
|
},
|
|
{ "2.1.1.0",
|
|
{
|
|
"Hpc Pack 2016 head node support",
|
|
"Dynamically resolve the Uris of the head node services",
|
|
}
|
|
},
|
|
{ "2.1.2.0",
|
|
{
|
|
"Fix the unknown node availability issue",
|
|
}
|
|
},
|
|
{ "2.1.3.0",
|
|
{
|
|
"Fix the node error issue when scheduler failover",
|
|
}
|
|
},
|
|
{ "2.1.4.0",
|
|
{
|
|
"Fix a job stuck in running issue",
|
|
}
|
|
},
|
|
{ "2.1.5.0",
|
|
{
|
|
"Fix a resync failure issue, caused job stuck in running",
|
|
}
|
|
},
|
|
{ "2.1.6.0",
|
|
{
|
|
"Fix the task process never start issue",
|
|
}
|
|
},
|
|
{ "2.2.1.0",
|
|
{
|
|
"Added built-in proxy support",
|
|
}
|
|
},
|
|
{ "2.2.2.0",
|
|
{
|
|
"Added task completion uri configuration",
|
|
}
|
|
},
|
|
{ "2.3.1.0",
|
|
{
|
|
"Added docker support",
|
|
"Added support to peek the task output",
|
|
"Fixed the unit test failure",
|
|
}
|
|
},
|
|
{ "2.3.2.0",
|
|
{
|
|
"Removed the requirement of public key in extended data of user credential",
|
|
"Removed unnecessary warning log of GPU monitors",
|
|
"Fixed the issue that user credential information may be left on disk when using execution filter",
|
|
"Fixed a bug that root user may fail in mutual trust in mpi docker task",
|
|
}
|
|
},
|
|
{ "2.3.3.0",
|
|
{
|
|
"Modified user mapping logic",
|
|
}
|
|
},
|
|
{ "2.3.4.0",
|
|
{
|
|
"Fixed a bug that task would fail when cgroup is not enabled",
|
|
}
|
|
},
|
|
{ "3.1.1.0",
|
|
{
|
|
"Support for HPC-ACM.",
|
|
}
|
|
},
|
|
{ "3.1.2.0",
|
|
{
|
|
"Make the JobId part of the task execution id",
|
|
}
|
|
},
|
|
};
|
|
|
|
return versionHistory;
|
|
}
|
|
|
|
static const std::string& GetVersion()
|
|
{
|
|
auto& h = GetVersionHistory();
|
|
auto it = --h.end();
|
|
return it->first;
|
|
}
|
|
|
|
static void PrintVersionHistory()
|
|
{
|
|
auto& h = GetVersionHistory();
|
|
for (auto& v : h)
|
|
{
|
|
std::cout << v.first << std::endl;
|
|
std::cout << "================================================================" << std::endl;
|
|
|
|
int number = 0;
|
|
for (auto& m : v.second)
|
|
{
|
|
number++;
|
|
std::cout << number << ". " << m << std::endl;
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
private:
|
|
|
|
};
|
|
}
|
|
|
|
#endif // VERSION_H_INCLUDED
|