This commit is contained in:
zclok010 2019-05-23 11:46:49 +08:00
Родитель 6a7ef893c6
Коммит 7c94523eb2
7 изменённых файлов: 103 добавлений и 58 удалений

Просмотреть файл

@ -144,14 +144,12 @@ void Process::OnCompletedInternal()
void* Process::ForkThread(void* arg)
{
Process* const p = static_cast<Process* const>(arg);
int ret;
std::string path;
auto dockerImageIt = p->environments.find("CCP_DOCKER_IMAGE");
bool isDockerTask = dockerImageIt != p->environments.end() && !dockerImageIt->second.empty();
auto disableCgroupIt = p->environments.find("CCP_DISABLE_CGROUP");
bool disableCgroup = disableCgroupIt != p->environments.end() && disableCgroupIt->second == "1";
std::string envFile;
Start:
int ret = p->CreateTaskFolder();
ret = p->CreateTaskFolder();
if (ret != 0)
{
p->message << "Task " << p->taskId << ": error when create task folder, ret " << ret << std::endl;
@ -171,33 +169,18 @@ Start:
goto Final;
}
if (isDockerTask)
p -> environmentsBuffer.clear();
std::transform(
p->environments.cbegin(),
p->environments.cend(),
std::back_inserter(p->environmentsBuffer),
[](const auto& v) { return String::Join("=", v.first, v.second); });
envFile = p->taskFolder + "/environments";
ret = System::WriteStringToFile(envFile, String::Join<'\n'>(p->environmentsBuffer));
if (ret != 0)
{
p -> environmentsBuffer.clear();
std::transform(
p->environments.cbegin(),
p->environments.cend(),
std::back_inserter(p->environmentsBuffer),
[](const auto& v) { return String::Join("=", v.first, v.second); });
std::string envFile = p->taskFolder + "/environments";
int ret = System::WriteStringToFile(envFile, String::Join<'\n'>(p->environmentsBuffer));
if (ret != 0)
{
Logger::Error(p->jobId, p->taskId, p->requeueCount, "Failed to create environment file for docker task. Exitcode: {0}", ret);
goto Final;
}
}
if (disableCgroup)
{
std::string flagFile = p->taskFolder + "/disable_cgroup";
int ret = System::WriteStringToFile(flagFile, "1");
if (ret != 0)
{
Logger::Error(p->jobId, p->taskId, p->requeueCount, "Failed to create flag file to disable cgroup. Exitcode: {0}", ret);
goto Final;
}
Logger::Error(p->jobId, p->taskId, p->requeueCount, "Failed to create task environments file {0}. Exitcode: {1}", envFile, ret);
goto Final;
}
if (0 != p->ExecuteCommand("/bin/bash", "PrepareTask.sh", p->taskExecutionId, p->GetAffinity(), p->taskFolder, p->userName))

Просмотреть файл

@ -10,7 +10,7 @@ taskId=$1
processId=$2
taskFolder=$3
isDockerTask=$(CheckDockerEnvFileExist $taskFolder)
isDockerTask=$(CheckDockerImageNameNotEmpty $taskFolder)
if $isDockerTask; then
isDebugMode=$(CheckDockerDebugMode $taskFolder)
if ! $isDebugMode; then
@ -32,9 +32,9 @@ if $isDockerTask; then
exit
fi
/bin/bash ./EndTask.sh "$taskId" "$processId" "1"
/bin/bash ./EndTask.sh "$taskId" "$processId" "1" "$taskFolder"
cgDisabled=$(CheckCgroupDisabledInFlagFile $taskFolder)
cgDisabled=$(CheckDisableCgroupSet $taskFolder)
if $CGInstalled && ! $cgDisabled; then
groupName=$(GetCGroupName "$taskId")
group=$CGroupSubSys:$groupName

Просмотреть файл

@ -12,8 +12,8 @@ processId=$2
forced=$3
taskFolder=$4
isDockerTask=$(CheckDockerEnvFileExist $taskFolder)
cgDisabled=$(CheckCgroupDisabledInFlagFile $taskFolder)
isDockerTask=$(CheckDockerImageNameNotEmpty $taskFolder)
cgDisabled=$(CheckDisableCgroupSet $taskFolder)
if $CGInstalled && ! $cgDisabled; then
if $isDockerTask; then
containerId=$(GetContainerId $taskFolder)

Просмотреть файл

@ -12,7 +12,7 @@ affinity=$2
taskFolder=$3
userName=$4
isDockerTask=$(CheckDockerEnvFileExist $taskFolder)
isDockerTask=$(CheckDockerImageNameNotEmpty $taskFolder)
if $isDockerTask; then
isMpiTask=$(CheckMpiTask $taskFolder)
if $isMpiTask; then
@ -28,7 +28,7 @@ if $isDockerTask; then
dockerImage=$(GetDockerImageName $taskFolder)
volumeOption=$(GetDockerVolumeOption $taskFolder)
additionalOption=$(GetDockerAdditionalOption $taskFolder)
envFile=$(GetDockerTaskEnvFile $taskFolder)
envFile=$(GetTaskEnvFile $taskFolder)
containerIdFile=$(GetContainerIdFile $taskFolder)
dockerEngine=$(GetDockerEngine $taskFolder)
$dockerEngine run -id \
@ -70,7 +70,7 @@ if $isDockerTask; then
exit
fi
cgDisabled=$(CheckCgroupDisabledInFlagFile $taskFolder)
cgDisabled=$(CheckDisableCgroupSet $taskFolder)
if $CGInstalled && ! $cgDisabled; then
groupName=$(GetCGroupName "$taskId")
group=$CGroupSubSys:$groupName
@ -116,11 +116,11 @@ if $CGInstalled && ! $cgDisabled; then
exit $ec
fi
numaMaxIndex=$((`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'` - 1))
maxLoop=3
while [ $maxLoop -gt 0 ]
do
memsFile=$(GetMemsFile "$groupName")
numaMaxIndex=$((`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'` - 1))
echo 0-$numaMaxIndex > "$memsFile"
ec=$?
if [ $ec -eq 0 ]
@ -138,6 +138,49 @@ if $CGInstalled && ! $cgDisabled; then
exit $ec
fi
memoryLimit=$(GetMemoryLimitBytes $taskFolder)
maxLoop=3
while [ $maxLoop -gt 0 ]
do
memoryLimitFile=$(GetMemoryLimitFile "$groupName")
echo $memoryLimit > "$memoryLimitFile"
ec=$?
if [ $ec -eq 0 ]
then
break
fi
echo "Failed to set memory limit for $group, error code $ec, retry after .5 seconds"
((maxLoop--))
sleep .5
done
if [ $ec -ne 0 ]
then
exit $ec
fi
maxLoop=3
while [ $maxLoop -gt 0 ]
do
memorySwappinessFile=$(GetMemorySwappinessFile "$groupName")
echo 0 > "$memorySwappinessFile"
ec=$?
if [ $ec -eq 0 ]
then
break
fi
echo "Failed to disable memory swap for $group, error code $ec, retry after .5 seconds"
((maxLoop--))
sleep .5
done
if [ $ec -ne 0 ]
then
exit $ec
fi
tasks=$(GetCpusetTasksFile "$groupName")
freezerState=$(GetFreezerStateFile "$groupName")
@ -145,5 +188,4 @@ if $CGInstalled && ! $cgDisabled; then
[ ! -f "$freezerState" ] && echo "$freezerState doesn't exist" && exit 201
exit 0
fi
fi

Просмотреть файл

@ -23,7 +23,7 @@ case "$CCP_MPI_HOSTFILE_FORMAT" in
*) echo $CCP_NODES_CORES | tr ' ' '\n' | sed -n 'n;p' > $CCP_MPI_HOSTFILE;;
esac
isDockerTask=$(CheckDockerEnvFileExist $taskFolder)
isDockerTask=$(CheckDockerImageNameNotEmpty $taskFolder)
if $isDockerTask; then
containerId=$(GetContainerId $taskFolder)
docker exec $containerId /bin/bash -c "$taskFolder/TestMutualTrust.sh $taskId $taskFolder $userName" &&\
@ -31,7 +31,7 @@ if $isDockerTask; then
exit
fi
cgDisabled=$(CheckCgroupDisabledInFlagFile $taskFolder)
cgDisabled=$(CheckDisableCgroupSet $taskFolder)
if $CGInstalled && ! $cgDisabled; then
groupName=$(GetCGroupName "$taskId")
group=$CGroupSubSys:$groupName

Просмотреть файл

@ -9,7 +9,7 @@
taskId=$1
taskFolder=$2
isDockerTask=$(CheckDockerEnvFileExist $taskFolder)
isDockerTask=$(CheckDockerImageNameNotEmpty $taskFolder)
userTime10Ms=0
kernelTime10Ms=0
@ -34,7 +34,7 @@ function GetMemoryMaxusageFile
GetGroupFile "$groupName" memory memory.max_usage_in_bytes
}
cgDisabled=$(CheckCgroupDisabledInFlagFile $taskFolder)
cgDisabled=$(CheckDisableCgroupSet $taskFolder)
if $CGInstalled && ! $cgDisabled; then
if $isDockerTask; then
containerId=$(GetContainerId $taskFolder)

Просмотреть файл

@ -55,6 +55,18 @@ function GetFreezerStateFile
GetGroupFile "$groupName" freezer freezer.state
}
function GetMemoryLimitFile
{
local groupName=$1
GetGroupFile "$groupName" memory memory.limit_in_bytes
}
function GetMemorySwappinessFile
{
local groupName=$1
GetGroupFile "$groupName" memory memory.swappiness
}
MpiContainerSuffix="MPI"
DebugContainerSuffix="DEBUG"
TmpSshDir="/tmp/hpcSshKey/.ssh"
@ -85,7 +97,7 @@ function GetContainerPlaceholder
echo "$taskFolder/placeholder"
}
function GetDockerTaskEnvFile
function GetTaskEnvFile
{
local taskFolder=$1
echo "$taskFolder/environments"
@ -103,10 +115,10 @@ function GetContainerId
cat $(GetContainerIdFile $taskFolder)
}
function CheckDockerEnvFileExist
function CheckDockerImageNameNotEmpty
{
local taskFolder=$1
[ -f $(GetDockerTaskEnvFile $taskFolder) ] && echo true || echo false
[ -z $taskFolder ] || [ -z $(GetDockerImageName $taskFolder) ] && echo false || echo true
}
function GetUserSshDir
@ -127,14 +139,14 @@ function GetMpiContainerStartOption
function CheckMpiTask
{
local taskFolder=$1
local nodeNum=$(cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_NODES=" | sed -r 's/^CCP_NODES=([0-9]+) .*/\1/g')
local nodeNum=$(cat $(GetTaskEnvFile $taskFolder) | grep "CCP_NODES=" | sed -r 's/^CCP_NODES=([0-9]+) .*/\1/g')
[ "$nodeNum" -gt 1 ] && echo true || echo false
}
function CheckDockerDebugMode
{
local taskFolder=$1
debugOption=$(cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_DOCKER_DEBUG=" | cut -d '=' -f 2)
debugOption=$(cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DOCKER_DEBUG=" | cut -d '=' -f 2)
[ -z $debugOption ] || [ "$debugOption" == "0" ] && echo false || echo true
}
@ -145,7 +157,7 @@ function GetDockerEngine
echo "docker"
else
local taskFolder=$1
local nvidiaOption=$(cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_DOCKER_NVIDIA=" | cut -d '=' -f 2)
local nvidiaOption=$(cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DOCKER_NVIDIA=" | cut -d '=' -f 2)
if [ -z $nvidiaOption ] || [ "$nvidiaOption" == "0" ]; then
echo "docker"
else
@ -157,19 +169,19 @@ function GetDockerEngine
function GetDockerImageName
{
local taskFolder=$1
cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_DOCKER_IMAGE=" | cut -d '=' -f 2
cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DOCKER_IMAGE=" | cut -d '=' -f 2
}
function GetDockerVolumeOption
{
local taskFolder=$1
cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_DOCKER_VOLUMES=" | sed -e 's/^CCP_DOCKER_VOLUMES=/-v /g' -e 's/,/ -v /g'
cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DOCKER_VOLUMES=" | sed -e 's/^CCP_DOCKER_VOLUMES=/-v /g' -e 's/,/ -v /g'
}
function GetDockerAdditionalOption
{
local taskFolder=$1
cat $(GetDockerTaskEnvFile $taskFolder) | grep "CCP_DOCKER_START_OPTION=" | sed 's/^CCP_DOCKER_START_OPTION=//'
cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DOCKER_START_OPTION=" | sed 's/^CCP_DOCKER_START_OPTION=//'
}
function GetSshStartCommand
@ -184,9 +196,17 @@ function GetSshStopCommand
echo $version | grep -iq ubuntu && echo "service ssh stop" || echo "service sshd stop"
}
function CheckCgroupDisabledInFlagFile
function CheckDisableCgroupSet
{
local taskFolder=$1
local flagFile="$taskFolder/disable_cgroup"
[ -f $flagFile ] && [ "$(head $flagFile)" == "1" ] && echo true || echo false
[ -z $taskFolder ] && echo false && exit
local disableCgroupValue=$(cat $(GetTaskEnvFile $taskFolder) | grep "CCP_DISABLE_CGROUP=" | cut -d '=' -f 2)
[ "$disableCgroupValue" == "1" ] && echo true || echo false
}
function GetMemoryLimitBytes
{
local taskFolder=$1
local memoryLimitMegaBytes=$(cat $(GetTaskEnvFile $taskFolder) | grep "CCP_MAXIMUMMEMORY=" | cut -d '=' -f 2)
[ -z $memoryLimitMegaBytes ] && echo -1 || echo ${memoryLimitMegaBytes}M
}