Added a new updated RDMA test (beta)

This commit is contained in:
Shital Savekar 2017-10-25 21:41:25 +05:30
Родитель 5915cc4c41
Коммит 4a0e0d5a51
3 изменённых файлов: 907 добавлений и 3 удалений

Просмотреть файл

@ -3585,6 +3585,218 @@
</VirtualMachine>
</HostedService>
</M1S1>
<S1C15>
<isDeployed>NO</isDeployed>
<HostedService>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>server-vm</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1110</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-1</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1111</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-2</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1112</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-3</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1113</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-4</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1114</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-5</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1115</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-6</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1116</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-7</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1117</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-8</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1118</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-9</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1119</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-10</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1120</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-11</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1121</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-12</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1122</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-13</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1123</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-14</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1124</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
<VirtualMachine>
<state></state>
<InstanceSize>Standard_D1_v2</InstanceSize>
<ARMInstanceSize>Standard_D1_v2</ARMInstanceSize>
<RoleName>client-vm-15</RoleName>
<EndPoints>
<Name>SSH</Name>
<Protocol>tcp</Protocol>
<LocalPort>22</LocalPort>
<PublicPort>1125</PublicPort>
</EndPoints>
<DataDisk></DataDisk>
</VirtualMachine>
</S1C15>
<RDMA2VMA8>
<isDeployed>NO</isDeployed>
<HostedService>
@ -7055,6 +7267,30 @@
<InitialWaitSeconds>300</InitialWaitSeconds>
<Priority>P0</Priority>
</test>
<test>
<testName>ICA-RDMA-TEST-MultiVM</testName>
<testScript></testScript>
<testScriptps1>ICA-RDMA-TEST-MultiVM.ps1</testScriptps1>
<setupType>S1C15</setupType>
<SupportedExecutionModes>AzureServiceManagement,AzureResourceManager</SupportedExecutionModes>
<SubtestValues>SOME,TEXTS,NEEDS,TO,BE,PRESENT,HERE,FOR,PRINTING,TEST,SUMMARY</SubtestValues>
<remoteFiles></remoteFiles>
<InitialWaitSeconds>60</InitialWaitSeconds>
<Priority>P0</Priority>
<NumberOfReboots>5</NumberOfReboots>
<params>
<param>mpi_settings="-env I_MPI_FABRICS dapl -env I_MPI_DAPL_PROVIDER=ofa-v2-ib0 -env SECS_PER_SAMPLE=600"</param>
<param>imb_mpi1_tests="pingpong allreduce"</param>
<param>mpi1_ppn=2</param>
<param>imb_mpi1_tests_iterations=10</param>
<param>imb_rma_tests="all"</param>
<param>rma_ppn=2</param>
<param>imb_rma_tests_iterations=1</param>
<param>imb_nbc_tests="all"</param>
<param>nbc_ppn=2</param>
<param>imb_nbc_tests_iterations=1</param>
</params>
</test>
<test>
<testName>ICA-RDMA-TEST-2VM-A8</testName>
<testScript></testScript>
@ -8603,9 +8839,9 @@
</test>
</Cycle>
<Cycle>
<cycleName>RDMA16VM</cycleName>
<cycleName>RDMAMultiVM</cycleName>
<test>
<Name>ICA-RDMA-TEST-16VM</Name>
<Name>ICA-RDMA-TEST-MultiVM</Name>
</test>
</Cycle>
<Cycle>

Просмотреть файл

@ -0,0 +1,362 @@
#author - v-shisav@microsoft.com
Import-Module .\TestLibs\RDFELibs.psm1 -Force
$result = ""
$testResult = ""
$resultArr = @()
if ($currentTestData.OverrideVMSize)
{
Set-Variable -Name OverrideVMSize -Value $currentTestData.OverrideVMSize -Scope Global
}
$isDeployed = DeployVMS -setupType $currentTestData.setupType -Distro $Distro -xmlConfig $xmlConfig
if ($isDeployed)
{
try
{
$noServer = $true
$noClient = $true
$clientMachines = @()
$slaveHostnames = ""
foreach ( $vmData in $allVMData )
{
if ( $vmData.RoleName -imatch "Server" )
{
$serverVMData = $vmData
$noServer = $false
}
elseif ( $vmData.RoleName -imatch "Client" )
{
$clientMachines += $vmData
$noClient = $fase
if ( $slaveHostnames )
{
$slaveHostnames += "," + $vmData.RoleName
}
else
{
$slaveHostnames = $vmData.RoleName
}
}
}
if ( $noServer )
{
Throw "No any server VM defined. Be sure that, server VM role name matches with the pattern `"*server*`". Aborting Test."
}
if ( $noSlave )
{
Throw "No any client VM defined. Be sure that, client machine role names matches with pattern `"*client*`" Aborting Test."
}
if ($serverVMData.InstanceSize -imatch "Standard_NC")
{
LogMsg "Waiting 5 minutes to finish RDMA update for NC series VMs."
Start-Sleep -Seconds 300
}
#region CONFIGURE VMs for TEST
LogMsg "SERVER VM details :"
LogMsg " RoleName : $($serverVMData.RoleName)"
LogMsg " Public IP : $($serverVMData.PublicIP)"
LogMsg " SSH Port : $($serverVMData.SSHPort)"
$i = 1
foreach ( $clientVMData in $clientMachines )
{
LogMsg "CLIENT VM #$i details :"
LogMsg " RoleName : $($clientVMData.RoleName)"
LogMsg " Public IP : $($clientVMData.PublicIP)"
LogMsg " SSH Port : $($clientVMData.SSHPort)"
$i += 1
}
#
# PROVISION VMS FOR LISA WILL ENABLE ROOT USER AND WILL MAKE ENABLE PASSWORDLESS AUTHENTICATION ACROSS ALL VMS IN SAME HOSTED SERVICE.
#
ProvisionVMsForLisa -allVMData $allVMData -installPackagesOnRoleNames "none"
#endregion
#region Generate constants.sh
LogMsg "Generating constansts.sh ..."
$constantsFile = ".\$LogDir\constants.sh"
foreach ($testParam in $currentTestData.params.param )
{
Add-Content -Value "$testParam" -Path $constantsFile
LogMsg "$testParam added to constansts.sh"
if ($testParam -imatch "imb_mpi1_tests_iterations")
{
$imb_mpi1_test_iterations = [int]($testParam.Replace("imb_mpi1_tests_iterations=",""))
}
if ($testParam -imatch "imb_rma_tests_iterations")
{
$imb_rma_tests_iterations = [int]($testParam.Replace("imb_rma_tests_iterations=",""))
}
if ($testParam -imatch "imb_nbc_tests_iterations")
{
$imb_nbc_tests_iterations = [int]($testParam.Replace("imb_nbc_tests_iterations=",""))
}
}
Add-Content -Value "master=`"$($serverVMData.RoleName)`"" -Path $constantsFile
LogMsg "master=$($serverVMData.RoleName) added to constansts.sh"
Add-Content -Value "slaves=`"$slaveHostnames`"" -Path $constantsFile
LogMsg "slaves=$slaveHostnames added to constansts.sh"
LogMsg "constanst.sh created successfully..."
#endregion
#region Upload files to master VM...
RemoteCopy -uploadTo $serverVMData.PublicIP -port $serverVMData.SSHPort -files "$constantsFile,.\remote-scripts\TestRDMA_MultiVM.sh" -username "root" -password $password -upload
#endregion
RemoteCopy -uploadTo $serverVMData.PublicIP -port $serverVMData.SSHPort -files "$constantsFile" -username "root" -password $password -upload
$out = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "chmod +x *.sh"
$remainingRebootIterations = $currentTestData.NumberOfReboots
$ExpectedSuccessCount = [int]($currentTestData.NumberOfReboots) + 1
$totalSuccessCount = 0
$iteration = 0
do
{
#region EXECUTE TEST
$iteration += 1
LogMsg "********************************Iteration - $iteration/$ExpectedSuccessCount***********************************************"
$testJob = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "/root/TestRDMA_MultiVM.sh" -RunInBackground
#endregion
#region MONITOR TEST
while ( (Get-Job -Id $testJob).State -eq "Running" )
{
$currentStatus = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "tail -n 1 /root/TestRDMALogs.txt"
LogMsg "Current Test Staus : $currentStatus"
WaitFor -seconds 10
}
RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/eth1-status*"
RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/IMB-*"
RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/TestRDMALogs.txt"
RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/state.txt"
$consoleOutput = ( Get-Content -Path "$LogDir\TestRDMALogs.txt" | Out-String )
$finalStatus = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "cat /root/state.txt"
if($iteration -eq 1)
{
$tempName = "FirstBoot"
}
else
{
$tempName = "Reboot"
}
$out = mkdir -Path "$LogDir\InfiniBand-Verification-$iteration-$tempName" -Force | Out-Null
$out = Move-Item -Path "$LogDir\eth1-status*" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null
$out = Move-Item -Path "$LogDir\IMB-*" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null
$out = Move-Item -Path "$LogDir\TestRDMALogs.txt" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null
$out = Move-Item -Path "$LogDir\state.txt" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null
#region Check if eth1 got IP address
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_ETH1"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : eth1 IP"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
#endregion
#region Check MPI pingpong intranode tests
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTRANODE"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : PingPong Intranode"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
#endregion
#region Check MPI pingpong internode tests
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTERNODE"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : PingPong Internode"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
#endregion
#region Check MPI1 all nodes tests
if ( $imb_mpi1_test_iterations -ge 1)
{
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_ALLNODES"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-MPI1"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
}
#endregion
#region Check RMA all nodes tests
if ( $imb_rma_tests_iterations -ge 1)
{
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-RMA"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
}
#endregion
#region Check NBC all nodes tests
if ( $imb_nbc_tests_iterations -ge 1)
{
$logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt"
$pattern = "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES"
LogMsg "Analysing $logFileName"
$metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-NBC"
$sucessLogs = Select-String -Path $logFileName -Pattern $pattern
if ($sucessLogs.Count -eq 1)
{
$currentResult = "PASS"
}
else
{
$currentResult = "FAIL"
}
LogMsg "$pattern : $currentResult"
$resultArr += $currentResult
$resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName
}
#endregion
if ($finalStatus -imatch "TestCompleted")
{
LogMsg "Test finished successfully."
LogMsg $consoleOutput
}
else
{
LogErr "Test failed."
LogErr $consoleOutput
}
#endregion
if ( $finalStatus -imatch "TestFailed")
{
LogErr "Test failed. Last known status : $currentStatus."
$testResult = "FAIL"
}
elseif ( $finalStatus -imatch "TestAborted")
{
LogErr "Test Aborted. Last known status : $currentStatus."
$testResult = "ABORTED"
}
elseif ( $finalStatus -imatch "TestCompleted")
{
LogMsg "Test Completed. Result : $finalStatus."
$testResult = "PASS"
$totalSuccessCount += 1
}
elseif ( $finalStatus -imatch "TestRunning")
{
LogMsg "Powershell backgroud job for test is completed but VM is reporting that test is still running. Please check $LogDir\mdConsoleLogs.txt"
LogMsg "Contests of state.txt : $finalStatus"
$testResult = "FAIL"
}
LogMsg "*********************************************************************************************"
if ($remainingRebootIterations -gt 0)
{
$RestartStatus = RestartAllDeployments -allVMData $allVMData
$remainingRebootIterations -= 1
}
}
while(($ExpectedSuccessCount -ne $iteration) -and ($RestartStatus -eq "True"))
if ( $ExpectedSuccessCount -eq $totalSuccessCount )
{
$testResult = "PASS"
}
else
{
$testResult = "FAIL"
}
LogMsg "Test result : $testResult"
LogMsg "Test Completed"
}
catch
{
$ErrorMessage = $_.Exception.Message
LogMsg "EXCEPTION : $ErrorMessage"
}
Finally
{
if (!$testResult)
{
$testResult = "Aborted"
}
$resultArr += $testResult
}
}
else
{
$testResult = "Aborted"
$resultArr += $testResult
}
$result = GetFinalResultHeader -resultarr $resultArr
#Clean up the setup
DoTestCleanUp -result $result -testName $currentTestData.testName -deployedServices $isDeployed -ResourceGroups $isDeployed
#Return the result and summery to the test suite script..
return $result, $resultSummary

Просмотреть файл

@ -0,0 +1,306 @@
#!/bin/bash
#######################################################################
#
# Linux on Hyper-V and Azure Test Code, ver. 1.0.0
# Copyright (c) Microsoft Corporation
#
# All rights reserved.
# Licensed under the Apache License, Version 2.0 (the ""License"");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS
# OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
# ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR
# PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT.
#
# See the Apache Version 2.0 License for specific language governing
# permissions and limitations under the License.
#
#######################################################################
#######################################################################
#
#
#
# Description:
#######################################################################
#HOW TO PARSE THE ARGUMENTS.. SOURCE - http://stackoverflow.com/questions/4882349/parsing-shell-script-arguments
while echo $1 | grep ^- > /dev/null; do
eval $( echo $1 | sed 's/-//g' | tr -d '\012')=$2
shift
shift
done
master=$master
slaves=$slaves
rm -rf /root/TestRDMALogs.txt
#
# Constants/Globals
#
CONSTANTS_FILE="/root/constants.sh"
ICA_TESTRUNNING="TestRunning" # The test is running
ICA_TESTCOMPLETED="TestCompleted" # The test completed successfully
ICA_TESTABORTED="TestAborted" # Error during the setup of the test
ICA_TESTFAILED="TestFailed" # Error occurred during the test
CurrentMachine=""
imb_mpi1_finalStatus=0
imb_rma_finalStatus=0
imb_nbc_finalStatus=0
#######################################################################
#
# LogMsg()
#
#######################################################################
LogMsg()
{
timeStamp=`date "+%b %d %Y %T"`
echo "$timeStamp : ${1}" # Add the time stamp to the log message
echo "$timeStamp : ${1}" >> /root/TestRDMALogs.txt
}
UpdateTestState()
{
echo "${1}" > /root/state.txt
}
PrepareForRDMA()
{
# TODO
echo Doing Nothing
}
if [ -e ${CONSTANTS_FILE} ]; then
source ${CONSTANTS_FILE}
else
errMsg="Error: missing ${CONSTANTS_FILE} file"
LogMsg "${errMsg}"
UpdateTestState $ICA_TESTABORTED
exit 10
fi
slavesArr=`echo ${slaves} | tr ',' ' '`
mpirunPath=`find / -name mpirun | grep intel64`
LogMsg "MPIRUN Path: $mpirunPath"
imb_mpi1Path=`find / -name IMB-MPI1 | grep intel64`
LogMsg "IMB-MPI1 Path: $imb_mpi1Path"
imb_rmaPath=`find / -name IMB-RMA | grep intel64`
LogMsg "IMB-RMA Path: $imb_rmaPath"
imb_nbcPath=`find / -name IMB-NBC | grep intel64`
LogMsg "IMB-NBC Path: $imb_nbcPath"
#Verify if eth1 got IP address on All VMs in current cluster.
finaleth1Status=0
totalVMs=0
slavesArr=`echo ${slaves} | tr ',' ' '`
for vm in $master $slavesArr
do
LogMsg "Checking eth1 status in $vm"
temp=`ssh root@${vm} "ifconfig eth1 | grep 'inet '"`
eth1Status=$?
ssh root@${vm} "ifconfig eth1 > eth1-status-${vm}.txt"
scp root@${vm}:eth1-status-${vm}.txt .
if [ $eth1Status -eq 0 ];
then
LogMsg "eth1 IP detected for ${vm}."
else
LogMsg "Error: eth1 failed to get IP address for ${vm}."
fi
finaleth1Status=$(( $finaleth1Status + $eth1Status ))
totalVMs=$(( $totalVMs + 1 ))
done
if [ $finaleth1Status -ne 0 ]; then
LogMsg "ERROR: Some VMs did get IP address for eth1. Aborting Tests"
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_ETH1"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_ETH1"
fi
##Verify MPI Tests
#Verify PingPong Tests (IntraNode).
finalMpiIntranodeStatus=0
slavesArr=`echo ${slaves} | tr ',' ' '`
for vm in $master $slavesArr
do
LogMsg "$mpirunPath -hosts $vm -ppn $mpi1_ppn -n $mpi1_ppn $mpi_settings $imb_mpi1Path pingpong"
LogMsg "Checking IMB-MPI1 Intranode status in $vm"
ssh root@${vm} "$mpirunPath -hosts $vm -ppn $mpi1_ppn -n $mpi1_ppn $mpi_settings $imb_mpi1Path pingpong > IMB-MPI1-IntraNode-pingpong-output-$vm.txt"
mpiIntranodeStatus=$?
scp root@${vm}:IMB-MPI1-IntraNode-pingpong-output-$vm.txt .
if [ $eth1Status -eq 0 ];
then
LogMsg "IMB-MPI1 Intranode status in $vm - Succeeded."
else
LogMsg "IMB-MPI1 Intranode status in $vm - Failed"
fi
finalMpiIntranodeStatus=$(( $finalMpiIntranodeStatus + $mpiIntranodeStatus ))
done
if [ $finalMpiIntranodeStatus -ne 0 ]; then
LogMsg "ERROR: IMB-MPI1 Intranode test failed in somes VMs. Aborting further tests."
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_INTRANODE"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTRANODE"
fi
#Verify PingPong Tests (InterNode).
finalMpiInternodeStatus=0
slavesArr=`echo ${slaves} | tr ',' ' '`
for vm in $slavesArr
do
LogMsg "$mpirunPath -hosts $master,$vm -ppn $mpi1_ppn -n $(( $mpi1_ppn * 2 )) $mpi_settings $imb_mpi1Path pingpong"
LogMsg "Checking IMB-MPI1 InterNode status in $vm"
$mpirunPath -hosts $master,$vm -ppn $mpi1_ppn -n $(( $mpi1_ppn * 2 )) $mpi_settings $imb_mpi1Path pingpong > IMB-MPI1-InterNode-pingpong-output-${master}-${vm}.txt
mpiInternodeStatus=$?
if [ $eth1Status -eq 0 ];
then
LogMsg "IMB-MPI1 Internode status in $vm - Succeeded."
else
LogMsg "IMB-MPI1 Internode status in $vm - Failed"
fi
finalMpiInternodeStatus=$(( $finalMpiInternodeStatus + $mpiInternodeStatus ))
done
if [ $finalMpiInternodeStatus -ne 0 ]; then
LogMsg "ERROR: IMB-MPI1 Internode test failed in somes VMs. Aborting further tests."
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_INTERNODE"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTERNODE"
fi
#Verify IMB-MPI1 (pingpong & allreduce etc) tests.
Attempts=`seq 1 1 $imb_mpi1_tests_iterations`
imb_mpi1_finalStatus=0
for i in $Attempts;
do
if [[ $imb_mpi1_tests == "all" ]];
then
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path"
LogMsg "IMB-MPI1 test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path > IMB-MPI1-AllNodes-output-Attempt-${i}.txt
mpiStatus=$?
else
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path $imb_mpi1_tests"
LogMsg "IMB-MPI1 test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path $imb_mpi1_tests > IMB-MPI1-AllNodes-output-Attempt-${i}.txt
mpiStatus=$?
fi
if [ $mpiStatus -eq 0 ];
then
LogMsg "IMB-MPI1 test iteration $i - Succeeded."
sleep 1
else
LogMsg "IMB-MPI1 test iteration $i - Failed."
imb_mpi1_finalStatus=$(( $imb_mpi1_finalStatus + $mpiStatus ))
sleep 1
fi
done
if [ $imb_mpi1_finalStatus -ne 0 ]; then
LogMsg "ERROR: IMB-MPI1 tests returned non-zero exit code."
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_ALLNODES"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_ALLNODES"
fi
#Verify IMB-RMA tests.
Attempts=`seq 1 1 $imb_rma_tests_iterations`
imb_rma_finalStatus=0
for i in $Attempts;
do
if [[ $imb_rma_tests == "all" ]];
then
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath"
LogMsg "IMB-RMA test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath > IMB-RMA-AllNodes-output-Attempt-${i}.txt
rmaStatus=$?
else
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath $imb_rma_tests"
LogMsg "IMB-RMA test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath $imb_rma_tests > IMB-RMA-AllNodes-output-Attempt-${i}.txt
rmaStatus=$?
fi
if [ $rmaStatus -eq 0 ];
then
LogMsg "IMB-RMA test iteration $i - Succeeded."
sleep 1
else
LogMsg "IMB-RMA test iteration $i - Failed."
imb_rma_finalStatus=$(( $imb_rma_finalStatus + $rmaStatus ))
sleep 1
fi
done
if [ $imb_rma_finalStatus -ne 0 ]; then
LogMsg "ERROR: IMB-RMA tests returned non-zero exit code. Aborting further tests."
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_RMA_ALLNODES"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES"
fi
#Verify IMB-NBC tests.
Attempts=`seq 1 1 $imb_nbc_tests_iterations`
imb_nbc_finalStatus=0
for i in $Attempts;
do
if [[ $imb_nbc_tests == "all" ]];
then
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath"
LogMsg "IMB-NBC test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath > IMB-NBC-AllNodes-output-Attempt-${i}.txt
nbcStatus=$?
else
LogMsg "$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath $imb_nbc_tests"
LogMsg "IMB-NBC test iteration $i - Running."
$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath $imb_nbc_tests > IMB-NBC-AllNodes-output-Attempt-${i}.txt
nbcStatus=$?
fi
if [ $nbcStatus -eq 0 ];
then
LogMsg "IMB-NBC test iteration $i - Succeeded."
sleep 1
else
LogMsg "IMB-NBC test iteration $i - Failed."
imb_nbc_finalStatus=$(( $imb_nbc_finalStatus + $nbcStatus ))
sleep 1
fi
done
if [ $imb_nbc_finalStatus -ne 0 ]; then
LogMsg "ERROR: IMB-RMA tests returned non-zero exit code. Aborting further tests."
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED_NBC_ALLNODES"
exit 0
else
LogMsg "INFINIBAND_VERIFICATION_SUCCESS_NBC_ALLNODES"
fi
finalStatus=$(( $eth1Status + $finalMpiIntranodeStatus + $finalMpiInternodeStatus + $imb_mpi1_finalStatus + $imb_rma_finalStatus + $imb_nbc_finalStatus ))
if [ $finalStatus -ne 0 ];
then
LogMsg LogMsg "eth1Status: $eth1Status, finalMpiIntranodeStatus:$finalMpiIntranodeStatus, finalMpiInternodeStatus:$finalMpiInternodeStatus, imb_mpi1_finalStatus:$imb_mpi1_finalStatu, imb_rma_finalStatus:$imb_rma_finalStatus, imb_nbc_finalStatus:$imb_nbc_finalStatus"
UpdateTestState $ICA_TESTFAILED
LogMsg "INFINIBAND_VERIFICATION_FAILED"
else
UpdateTestState $ICA_TESTCOMPLETED
LogMsg "INFINIBAND_VERIFIED_SUCCESSFULLY"
fi