diff --git a/Azure_ICA_all.xml b/Azure_ICA_all.xml index b4057e1..d7d2797 100644 --- a/Azure_ICA_all.xml +++ b/Azure_ICA_all.xml @@ -3584,7 +3584,219 @@ - + + + NO + + + + Standard_D1_v2 + Standard_D1_v2 + server-vm + + SSH + tcp + 22 + 1110 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-1 + + SSH + tcp + 22 + 1111 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-2 + + SSH + tcp + 22 + 1112 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-3 + + SSH + tcp + 22 + 1113 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-4 + + SSH + tcp + 22 + 1114 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-5 + + SSH + tcp + 22 + 1115 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-6 + + SSH + tcp + 22 + 1116 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-7 + + SSH + tcp + 22 + 1117 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-8 + + SSH + tcp + 22 + 1118 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-9 + + SSH + tcp + 22 + 1119 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-10 + + SSH + tcp + 22 + 1120 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-11 + + SSH + tcp + 22 + 1121 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-12 + + SSH + tcp + 22 + 1122 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-13 + + SSH + tcp + 22 + 1123 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-14 + + SSH + tcp + 22 + 1124 + + + + + + Standard_D1_v2 + Standard_D1_v2 + client-vm-15 + + SSH + tcp + 22 + 1125 + + + + NO @@ -7055,6 +7267,30 @@ 300 P0 + + ICA-RDMA-TEST-MultiVM + + ICA-RDMA-TEST-MultiVM.ps1 + S1C15 + AzureServiceManagement,AzureResourceManager + SOME,TEXTS,NEEDS,TO,BE,PRESENT,HERE,FOR,PRINTING,TEST,SUMMARY + + 60 + P0 + 5 + + mpi_settings="-env I_MPI_FABRICS dapl -env I_MPI_DAPL_PROVIDER=ofa-v2-ib0 -env SECS_PER_SAMPLE=600" + imb_mpi1_tests="pingpong allreduce" + mpi1_ppn=2 + imb_mpi1_tests_iterations=10 + imb_rma_tests="all" + rma_ppn=2 + imb_rma_tests_iterations=1 + imb_nbc_tests="all" + nbc_ppn=2 + imb_nbc_tests_iterations=1 + + ICA-RDMA-TEST-2VM-A8 @@ -8603,9 +8839,9 @@ - RDMA16VM + RDMAMultiVM - ICA-RDMA-TEST-16VM + ICA-RDMA-TEST-MultiVM diff --git a/remote-scripts/ICA-RDMA-TEST-MultiVM.ps1 b/remote-scripts/ICA-RDMA-TEST-MultiVM.ps1 new file mode 100644 index 0000000..d9b948c --- /dev/null +++ b/remote-scripts/ICA-RDMA-TEST-MultiVM.ps1 @@ -0,0 +1,362 @@ +#author - v-shisav@microsoft.com +Import-Module .\TestLibs\RDFELibs.psm1 -Force +$result = "" +$testResult = "" +$resultArr = @() +if ($currentTestData.OverrideVMSize) +{ + Set-Variable -Name OverrideVMSize -Value $currentTestData.OverrideVMSize -Scope Global +} +$isDeployed = DeployVMS -setupType $currentTestData.setupType -Distro $Distro -xmlConfig $xmlConfig +if ($isDeployed) +{ + try + { + $noServer = $true + $noClient = $true + $clientMachines = @() + $slaveHostnames = "" + foreach ( $vmData in $allVMData ) + { + if ( $vmData.RoleName -imatch "Server" ) + { + $serverVMData = $vmData + $noServer = $false + + } + elseif ( $vmData.RoleName -imatch "Client" ) + { + $clientMachines += $vmData + $noClient = $fase + if ( $slaveHostnames ) + { + $slaveHostnames += "," + $vmData.RoleName + } + else + { + $slaveHostnames = $vmData.RoleName + } + } + } + if ( $noServer ) + { + Throw "No any server VM defined. Be sure that, server VM role name matches with the pattern `"*server*`". Aborting Test." + } + if ( $noSlave ) + { + Throw "No any client VM defined. Be sure that, client machine role names matches with pattern `"*client*`" Aborting Test." + } + if ($serverVMData.InstanceSize -imatch "Standard_NC") + { + LogMsg "Waiting 5 minutes to finish RDMA update for NC series VMs." + Start-Sleep -Seconds 300 + } + #region CONFIGURE VMs for TEST + + LogMsg "SERVER VM details :" + LogMsg " RoleName : $($serverVMData.RoleName)" + LogMsg " Public IP : $($serverVMData.PublicIP)" + LogMsg " SSH Port : $($serverVMData.SSHPort)" + $i = 1 + foreach ( $clientVMData in $clientMachines ) + { + LogMsg "CLIENT VM #$i details :" + LogMsg " RoleName : $($clientVMData.RoleName)" + LogMsg " Public IP : $($clientVMData.PublicIP)" + LogMsg " SSH Port : $($clientVMData.SSHPort)" + $i += 1 + } + # + # PROVISION VMS FOR LISA WILL ENABLE ROOT USER AND WILL MAKE ENABLE PASSWORDLESS AUTHENTICATION ACROSS ALL VMS IN SAME HOSTED SERVICE. + # + + ProvisionVMsForLisa -allVMData $allVMData -installPackagesOnRoleNames "none" + + #endregion + + #region Generate constants.sh + + LogMsg "Generating constansts.sh ..." + $constantsFile = ".\$LogDir\constants.sh" + foreach ($testParam in $currentTestData.params.param ) + { + Add-Content -Value "$testParam" -Path $constantsFile + LogMsg "$testParam added to constansts.sh" + if ($testParam -imatch "imb_mpi1_tests_iterations") + { + $imb_mpi1_test_iterations = [int]($testParam.Replace("imb_mpi1_tests_iterations=","")) + } + if ($testParam -imatch "imb_rma_tests_iterations") + { + $imb_rma_tests_iterations = [int]($testParam.Replace("imb_rma_tests_iterations=","")) + } + if ($testParam -imatch "imb_nbc_tests_iterations") + { + $imb_nbc_tests_iterations = [int]($testParam.Replace("imb_nbc_tests_iterations=","")) + } + } + + Add-Content -Value "master=`"$($serverVMData.RoleName)`"" -Path $constantsFile + LogMsg "master=$($serverVMData.RoleName) added to constansts.sh" + + + Add-Content -Value "slaves=`"$slaveHostnames`"" -Path $constantsFile + LogMsg "slaves=$slaveHostnames added to constansts.sh" + + LogMsg "constanst.sh created successfully..." + #endregion + + #region Upload files to master VM... + RemoteCopy -uploadTo $serverVMData.PublicIP -port $serverVMData.SSHPort -files "$constantsFile,.\remote-scripts\TestRDMA_MultiVM.sh" -username "root" -password $password -upload + #endregion + + RemoteCopy -uploadTo $serverVMData.PublicIP -port $serverVMData.SSHPort -files "$constantsFile" -username "root" -password $password -upload + $out = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "chmod +x *.sh" + $remainingRebootIterations = $currentTestData.NumberOfReboots + $ExpectedSuccessCount = [int]($currentTestData.NumberOfReboots) + 1 + $totalSuccessCount = 0 + $iteration = 0 + do + { + + #region EXECUTE TEST + $iteration += 1 + LogMsg "********************************Iteration - $iteration/$ExpectedSuccessCount***********************************************" + $testJob = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "/root/TestRDMA_MultiVM.sh" -RunInBackground + #endregion + + #region MONITOR TEST + while ( (Get-Job -Id $testJob).State -eq "Running" ) + { + $currentStatus = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "tail -n 1 /root/TestRDMALogs.txt" + LogMsg "Current Test Staus : $currentStatus" + WaitFor -seconds 10 + } + + RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/eth1-status*" + RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/IMB-*" + RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/TestRDMALogs.txt" + RemoteCopy -downloadFrom $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -download -downloadTo $LogDir -files "/root/state.txt" + $consoleOutput = ( Get-Content -Path "$LogDir\TestRDMALogs.txt" | Out-String ) + $finalStatus = RunLinuxCmd -ip $serverVMData.PublicIP -port $serverVMData.SSHPort -username "root" -password $password -command "cat /root/state.txt" + if($iteration -eq 1) + { + $tempName = "FirstBoot" + } + else + { + $tempName = "Reboot" + } + $out = mkdir -Path "$LogDir\InfiniBand-Verification-$iteration-$tempName" -Force | Out-Null + $out = Move-Item -Path "$LogDir\eth1-status*" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null + $out = Move-Item -Path "$LogDir\IMB-*" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null + $out = Move-Item -Path "$LogDir\TestRDMALogs.txt" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null + $out = Move-Item -Path "$LogDir\state.txt" -Destination "$LogDir\InfiniBand-Verification-$iteration-$tempName" | Out-Null + + #region Check if eth1 got IP address + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_ETH1" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : eth1 IP" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + #endregion + + #region Check MPI pingpong intranode tests + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTRANODE" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : PingPong Intranode" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + #endregion + + #region Check MPI pingpong internode tests + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTERNODE" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : PingPong Internode" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + #endregion + + #region Check MPI1 all nodes tests + if ( $imb_mpi1_test_iterations -ge 1) + { + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_MPI1_ALLNODES" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-MPI1" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + } + #endregion + + #region Check RMA all nodes tests + if ( $imb_rma_tests_iterations -ge 1) + { + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-RMA" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + } + #endregion + + #region Check NBC all nodes tests + if ( $imb_nbc_tests_iterations -ge 1) + { + $logFileName = "$LogDir\InfiniBand-Verification-$iteration-$tempName\TestRDMALogs.txt" + $pattern = "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES" + LogMsg "Analysing $logFileName" + $metaData = "InfiniBand-Verification-$iteration-$tempName : IMB-NBC" + $sucessLogs = Select-String -Path $logFileName -Pattern $pattern + if ($sucessLogs.Count -eq 1) + { + $currentResult = "PASS" + } + else + { + $currentResult = "FAIL" + } + LogMsg "$pattern : $currentResult" + $resultArr += $currentResult + $resultSummary += CreateResultSummary -testResult $currentResult -metaData $metaData -checkValues "PASS,FAIL,ABORTED" -testName $currentTestData.testName + } + #endregion + + if ($finalStatus -imatch "TestCompleted") + { + LogMsg "Test finished successfully." + LogMsg $consoleOutput + } + else + { + LogErr "Test failed." + LogErr $consoleOutput + } + #endregion + + if ( $finalStatus -imatch "TestFailed") + { + LogErr "Test failed. Last known status : $currentStatus." + $testResult = "FAIL" + } + elseif ( $finalStatus -imatch "TestAborted") + { + LogErr "Test Aborted. Last known status : $currentStatus." + $testResult = "ABORTED" + } + elseif ( $finalStatus -imatch "TestCompleted") + { + LogMsg "Test Completed. Result : $finalStatus." + $testResult = "PASS" + $totalSuccessCount += 1 + } + elseif ( $finalStatus -imatch "TestRunning") + { + LogMsg "Powershell backgroud job for test is completed but VM is reporting that test is still running. Please check $LogDir\mdConsoleLogs.txt" + LogMsg "Contests of state.txt : $finalStatus" + $testResult = "FAIL" + } + LogMsg "*********************************************************************************************" + if ($remainingRebootIterations -gt 0) + { + $RestartStatus = RestartAllDeployments -allVMData $allVMData + $remainingRebootIterations -= 1 + } + + } + while(($ExpectedSuccessCount -ne $iteration) -and ($RestartStatus -eq "True")) + + if ( $ExpectedSuccessCount -eq $totalSuccessCount ) + { + $testResult = "PASS" + } + else + { + $testResult = "FAIL" + } + LogMsg "Test result : $testResult" + LogMsg "Test Completed" + + } + catch + { + $ErrorMessage = $_.Exception.Message + LogMsg "EXCEPTION : $ErrorMessage" + } + Finally + { + if (!$testResult) + { + $testResult = "Aborted" + } + $resultArr += $testResult + } +} + +else +{ + $testResult = "Aborted" + $resultArr += $testResult +} + +$result = GetFinalResultHeader -resultarr $resultArr + +#Clean up the setup +DoTestCleanUp -result $result -testName $currentTestData.testName -deployedServices $isDeployed -ResourceGroups $isDeployed + +#Return the result and summery to the test suite script.. +return $result, $resultSummary diff --git a/remote-scripts/TestRDMA_MultiVM.sh b/remote-scripts/TestRDMA_MultiVM.sh new file mode 100644 index 0000000..cde1d10 --- /dev/null +++ b/remote-scripts/TestRDMA_MultiVM.sh @@ -0,0 +1,306 @@ +#!/bin/bash + +####################################################################### +# +# Linux on Hyper-V and Azure Test Code, ver. 1.0.0 +# Copyright (c) Microsoft Corporation +# +# All rights reserved. +# Licensed under the Apache License, Version 2.0 (the ""License""); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS +# OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION +# ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR +# PURPOSE, MERCHANTABLITY OR NON-INFRINGEMENT. +# +# See the Apache Version 2.0 License for specific language governing +# permissions and limitations under the License. +# +####################################################################### + +####################################################################### +# +# +# +# Description: +####################################################################### + +#HOW TO PARSE THE ARGUMENTS.. SOURCE - http://stackoverflow.com/questions/4882349/parsing-shell-script-arguments + +while echo $1 | grep ^- > /dev/null; do + eval $( echo $1 | sed 's/-//g' | tr -d '\012')=$2 + shift + shift +done + +master=$master +slaves=$slaves +rm -rf /root/TestRDMALogs.txt +# +# Constants/Globals +# +CONSTANTS_FILE="/root/constants.sh" +ICA_TESTRUNNING="TestRunning" # The test is running +ICA_TESTCOMPLETED="TestCompleted" # The test completed successfully +ICA_TESTABORTED="TestAborted" # Error during the setup of the test +ICA_TESTFAILED="TestFailed" # Error occurred during the test +CurrentMachine="" +imb_mpi1_finalStatus=0 +imb_rma_finalStatus=0 +imb_nbc_finalStatus=0 +####################################################################### +# +# LogMsg() +# +####################################################################### +LogMsg() +{ + timeStamp=`date "+%b %d %Y %T"` + echo "$timeStamp : ${1}" # Add the time stamp to the log message + echo "$timeStamp : ${1}" >> /root/TestRDMALogs.txt +} + +UpdateTestState() +{ + echo "${1}" > /root/state.txt +} + +PrepareForRDMA() +{ + # TODO + echo Doing Nothing +} + + +if [ -e ${CONSTANTS_FILE} ]; then + source ${CONSTANTS_FILE} +else + errMsg="Error: missing ${CONSTANTS_FILE} file" + LogMsg "${errMsg}" + UpdateTestState $ICA_TESTABORTED + exit 10 +fi + +slavesArr=`echo ${slaves} | tr ',' ' '` +mpirunPath=`find / -name mpirun | grep intel64` +LogMsg "MPIRUN Path: $mpirunPath" +imb_mpi1Path=`find / -name IMB-MPI1 | grep intel64` +LogMsg "IMB-MPI1 Path: $imb_mpi1Path" +imb_rmaPath=`find / -name IMB-RMA | grep intel64` +LogMsg "IMB-RMA Path: $imb_rmaPath" +imb_nbcPath=`find / -name IMB-NBC | grep intel64` +LogMsg "IMB-NBC Path: $imb_nbcPath" + +#Verify if eth1 got IP address on All VMs in current cluster. +finaleth1Status=0 +totalVMs=0 +slavesArr=`echo ${slaves} | tr ',' ' '` +for vm in $master $slavesArr +do + LogMsg "Checking eth1 status in $vm" + temp=`ssh root@${vm} "ifconfig eth1 | grep 'inet '"` + eth1Status=$? + ssh root@${vm} "ifconfig eth1 > eth1-status-${vm}.txt" + scp root@${vm}:eth1-status-${vm}.txt . + if [ $eth1Status -eq 0 ]; + then + LogMsg "eth1 IP detected for ${vm}." + else + LogMsg "Error: eth1 failed to get IP address for ${vm}." + fi + finaleth1Status=$(( $finaleth1Status + $eth1Status )) + totalVMs=$(( $totalVMs + 1 )) +done +if [ $finaleth1Status -ne 0 ]; then + LogMsg "ERROR: Some VMs did get IP address for eth1. Aborting Tests" + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_ETH1" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_ETH1" +fi + + + +##Verify MPI Tests + +#Verify PingPong Tests (IntraNode). +finalMpiIntranodeStatus=0 +slavesArr=`echo ${slaves} | tr ',' ' '` +for vm in $master $slavesArr +do + LogMsg "$mpirunPath -hosts $vm -ppn $mpi1_ppn -n $mpi1_ppn $mpi_settings $imb_mpi1Path pingpong" + LogMsg "Checking IMB-MPI1 Intranode status in $vm" + ssh root@${vm} "$mpirunPath -hosts $vm -ppn $mpi1_ppn -n $mpi1_ppn $mpi_settings $imb_mpi1Path pingpong > IMB-MPI1-IntraNode-pingpong-output-$vm.txt" + mpiIntranodeStatus=$? + scp root@${vm}:IMB-MPI1-IntraNode-pingpong-output-$vm.txt . + if [ $eth1Status -eq 0 ]; + then + LogMsg "IMB-MPI1 Intranode status in $vm - Succeeded." + else + LogMsg "IMB-MPI1 Intranode status in $vm - Failed" + fi + finalMpiIntranodeStatus=$(( $finalMpiIntranodeStatus + $mpiIntranodeStatus )) +done + +if [ $finalMpiIntranodeStatus -ne 0 ]; then + LogMsg "ERROR: IMB-MPI1 Intranode test failed in somes VMs. Aborting further tests." + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_INTRANODE" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTRANODE" +fi + +#Verify PingPong Tests (InterNode). +finalMpiInternodeStatus=0 +slavesArr=`echo ${slaves} | tr ',' ' '` +for vm in $slavesArr +do + LogMsg "$mpirunPath -hosts $master,$vm -ppn $mpi1_ppn -n $(( $mpi1_ppn * 2 )) $mpi_settings $imb_mpi1Path pingpong" + LogMsg "Checking IMB-MPI1 InterNode status in $vm" + $mpirunPath -hosts $master,$vm -ppn $mpi1_ppn -n $(( $mpi1_ppn * 2 )) $mpi_settings $imb_mpi1Path pingpong > IMB-MPI1-InterNode-pingpong-output-${master}-${vm}.txt + mpiInternodeStatus=$? + if [ $eth1Status -eq 0 ]; + then + LogMsg "IMB-MPI1 Internode status in $vm - Succeeded." + else + LogMsg "IMB-MPI1 Internode status in $vm - Failed" + fi + finalMpiInternodeStatus=$(( $finalMpiInternodeStatus + $mpiInternodeStatus )) +done + +if [ $finalMpiInternodeStatus -ne 0 ]; then + LogMsg "ERROR: IMB-MPI1 Internode test failed in somes VMs. Aborting further tests." + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_INTERNODE" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_INTERNODE" +fi + +#Verify IMB-MPI1 (pingpong & allreduce etc) tests. +Attempts=`seq 1 1 $imb_mpi1_tests_iterations` +imb_mpi1_finalStatus=0 +for i in $Attempts; +do + if [[ $imb_mpi1_tests == "all" ]]; + then + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path" + LogMsg "IMB-MPI1 test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path > IMB-MPI1-AllNodes-output-Attempt-${i}.txt + mpiStatus=$? + else + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path $imb_mpi1_tests" + LogMsg "IMB-MPI1 test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $mpi1_ppn -n $(( $mpi1_ppn * $totalVMs )) $mpi_settings $imb_mpi1Path $imb_mpi1_tests > IMB-MPI1-AllNodes-output-Attempt-${i}.txt + mpiStatus=$? + fi + if [ $mpiStatus -eq 0 ]; + then + LogMsg "IMB-MPI1 test iteration $i - Succeeded." + sleep 1 + else + LogMsg "IMB-MPI1 test iteration $i - Failed." + imb_mpi1_finalStatus=$(( $imb_mpi1_finalStatus + $mpiStatus )) + sleep 1 + fi +done + +if [ $imb_mpi1_finalStatus -ne 0 ]; then + LogMsg "ERROR: IMB-MPI1 tests returned non-zero exit code." + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_MPI1_ALLNODES" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_MPI1_ALLNODES" + +fi + +#Verify IMB-RMA tests. +Attempts=`seq 1 1 $imb_rma_tests_iterations` +imb_rma_finalStatus=0 +for i in $Attempts; +do + if [[ $imb_rma_tests == "all" ]]; + then + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath" + LogMsg "IMB-RMA test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath > IMB-RMA-AllNodes-output-Attempt-${i}.txt + rmaStatus=$? + else + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath $imb_rma_tests" + LogMsg "IMB-RMA test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $rma_ppn -n $(( $rma_ppn * $totalVMs )) $mpi_settings $imb_rmaPath $imb_rma_tests > IMB-RMA-AllNodes-output-Attempt-${i}.txt + rmaStatus=$? + fi + if [ $rmaStatus -eq 0 ]; + then + LogMsg "IMB-RMA test iteration $i - Succeeded." + sleep 1 + else + LogMsg "IMB-RMA test iteration $i - Failed." + imb_rma_finalStatus=$(( $imb_rma_finalStatus + $rmaStatus )) + sleep 1 + fi +done + +if [ $imb_rma_finalStatus -ne 0 ]; then + LogMsg "ERROR: IMB-RMA tests returned non-zero exit code. Aborting further tests." + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_RMA_ALLNODES" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_RMA_ALLNODES" +fi + +#Verify IMB-NBC tests. +Attempts=`seq 1 1 $imb_nbc_tests_iterations` +imb_nbc_finalStatus=0 +for i in $Attempts; +do + if [[ $imb_nbc_tests == "all" ]]; + then + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath" + LogMsg "IMB-NBC test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath > IMB-NBC-AllNodes-output-Attempt-${i}.txt + nbcStatus=$? + else + LogMsg "$mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath $imb_nbc_tests" + LogMsg "IMB-NBC test iteration $i - Running." + $mpirunPath -hosts $master,$slaves -ppn $nbc_ppn -n $(( $nbc_ppn * $totalVMs )) $mpi_settings $imb_nbcPath $imb_nbc_tests > IMB-NBC-AllNodes-output-Attempt-${i}.txt + nbcStatus=$? + fi + if [ $nbcStatus -eq 0 ]; + then + LogMsg "IMB-NBC test iteration $i - Succeeded." + sleep 1 + else + LogMsg "IMB-NBC test iteration $i - Failed." + imb_nbc_finalStatus=$(( $imb_nbc_finalStatus + $nbcStatus )) + sleep 1 + fi +done + +if [ $imb_nbc_finalStatus -ne 0 ]; then + LogMsg "ERROR: IMB-RMA tests returned non-zero exit code. Aborting further tests." + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED_NBC_ALLNODES" + exit 0 +else + LogMsg "INFINIBAND_VERIFICATION_SUCCESS_NBC_ALLNODES" +fi + +finalStatus=$(( $eth1Status + $finalMpiIntranodeStatus + $finalMpiInternodeStatus + $imb_mpi1_finalStatus + $imb_rma_finalStatus + $imb_nbc_finalStatus )) +if [ $finalStatus -ne 0 ]; +then + LogMsg LogMsg "eth1Status: $eth1Status, finalMpiIntranodeStatus:$finalMpiIntranodeStatus, finalMpiInternodeStatus:$finalMpiInternodeStatus, imb_mpi1_finalStatus:$imb_mpi1_finalStatu, imb_rma_finalStatus:$imb_rma_finalStatus, imb_nbc_finalStatus:$imb_nbc_finalStatus" + UpdateTestState $ICA_TESTFAILED + LogMsg "INFINIBAND_VERIFICATION_FAILED" +else + UpdateTestState $ICA_TESTCOMPLETED + LogMsg "INFINIBAND_VERIFIED_SUCCESSFULLY" +fi \ No newline at end of file