From d9c03c6266b0db741e6dec4b4bd114c6e5f1cf32 Mon Sep 17 00:00:00 2001 From: lizzha Date: Wed, 25 Mar 2015 12:33:20 +0800 Subject: [PATCH] CoreOS image preparation + collect logs when VM provision failed in image preparation --- AzureAutomationManager.ps1 | 8 +- TestLibs/RDFELibs.psm1 | 97 ++++++++++++++++++++++- remote-scripts/GetLogFromDataDisk.py | 62 +++++++++++++++ remote-scripts/SETUP-INSTALL-PACKAGES.ps1 | 29 +++++-- remote-scripts/SETUP-INSTALL-PACKAGES.py | 71 +++++++++++++++-- 5 files changed, 250 insertions(+), 17 deletions(-) create mode 100644 remote-scripts/GetLogFromDataDisk.py diff --git a/AzureAutomationManager.ps1 b/AzureAutomationManager.ps1 index b5b0433..0fd89c3 100644 --- a/AzureAutomationManager.ps1 +++ b/AzureAutomationManager.ps1 @@ -9,7 +9,7 @@ # - Invokes azure test suite ## Author : v-ampaw@microsoft.com ############################################################################################### -param ([string] $xmlConfigFile, [switch] $eMail, [string] $logFilename="azure_ica.log", [switch] $runtests, [switch]$onCloud, [switch] $vhdprep, [switch]$upload, [switch] $help, [string] $Distro, [string] $cycleName, [string] $TestPriority, [string]$osImage, [switch]$EconomyMode, [switch]$keepReproInact) +param ([string] $xmlConfigFile, [switch] $eMail, [string] $logFilename="azure_ica.log", [switch] $runtests, [switch]$onCloud, [switch] $vhdprep, [switch]$upload, [switch] $help, [string] $Distro, [string] $cycleName, [string] $TestPriority, [string]$osImage, [switch]$EconomyMode, [switch]$keepReproInact, [string] $DebugDistro) #Import-Module .\TestLibs\RDFELibs.psm1 -Force Import-Module .\TestLibs\AzureWinUtils.psm1 -Force @@ -93,7 +93,6 @@ try Set-Variable -Name EconomyMode -Value $false -Scope Global Set-Variable -Name keepReproInact -Value $false -Scope Global } - $AzureSetup = $xmlConfig.config.Azure.General LogMsg ("Info : AzureAutomationManager.ps1 - LIS on Azure Automation") @@ -163,6 +162,11 @@ try #.\ica.ps1 .\XML\test.xml -runtests exit } + if ($DebugDistro) + { + $OsImage = $xmlConfig.config.Azure.Deployment.Data.Distro | ? { $_.name -eq $DebugDistro} | % { $_.OsImage } + Set-Variable -Name DebugOsImage -Value $OsImage -Scope Global + } $testCycle = GetCurrentCycleData -xmlConfig $xmlConfig -cycleName $cycleName #Invoke Azure Test Suite diff --git a/TestLibs/RDFELibs.psm1 b/TestLibs/RDFELibs.psm1 index cd31df9..1f73f36 100644 --- a/TestLibs/RDFELibs.psm1 +++ b/TestLibs/RDFELibs.psm1 @@ -779,7 +779,7 @@ Function VerifyAllDeployments($servicesToVerify) } else { - LogErr "$serviceName Failed.." + LogErr "$serviceName provision Failed.." $retValue = "False" } } @@ -998,7 +998,7 @@ Function SetDistroSpecificVariables($detectedDistro) Set-Variable -Name python_cmd -Value $python_cmd -Scope Global } -Function DeployVMs ($xmlConfig, $setupType, $Distro) +Function DeployVMs ($xmlConfig, $setupType, $Distro, $getLogsIfFailed = $false) { if( (!$EconomyMode) -or ( $EconomyMode -and ($xmlConfig.config.Azure.Deployment.$setupType.isDeployed -eq "NO"))) { @@ -1052,6 +1052,22 @@ Function DeployVMs ($xmlConfig, $setupType, $Distro) Write-Host "One or More Deployments are Failed..!" $retValue = $NULL } + # get the logs of the first provision-failed VM + if ($retValue -eq $NULL -and $getLogsIfFailed -and $DebugOsImage) + { + foreach ($service in $servicesToVerify) + { + $VMs = Get-AzureVM -ServiceName $service + foreach ($vm in $VMs) + { + if ($vm.InstanceStatus -ne "ReadyRole" ) + { + $out = GetLogsFromProvisionFailedVM -vmName $vm.Name -serviceName $service -xmlConfig $xmlConfig + return $NULL + } + } + } + } } catch { @@ -1074,6 +1090,81 @@ Function DeployVMs ($xmlConfig, $setupType, $Distro) return $retValue } +function GetLogsFromProvisionFailedVM ($vmName, $serviceName, $xmlConfig) +{ + try + { + LogMsg "Stopping the provision-failed VM : $vmName" + $tmp = Stop-AzureVM -ServiceName $serviceName -Name $vmName -Force + LogMsg "Stopped the VM succussfully" + + LogMsg "Capturing the provision-failed VM Image" + $ErrorImageName = "$serviceName-fail" + $tmp = Save-AzureVMImage -ServiceName $serviceName -Name $vmName -NewImageName $ErrorImageName -NewImageLabel $ErrorImageName + LogMsg "Successfully captured VM image : $ErrorImageName" + $vhdLink = (Get-AzureVMImage -ImageName $ErrorImageName).MediaLink + + $debugVMName = "$serviceName-debug" + $debugVMUser = $xmlConfig.config.Azure.Deployment.Data.UserName + $debugVMPasswd = $xmlConfig.config.Azure.Deployment.Data.Password + + $debugSshPath = "/home/$debugVMUser/.ssh/authorized_keys" + + LogMsg "Creating debug VM $debugVMName in service $serviceName" + $newVmConfigCmd = "New-AzureVMConfig -Name $debugVMName -InstanceSize `"Basic_A1`" -ImageName $DebugOsImage | Add-AzureProvisioningConfig -Linux -LinuxUser $debugVMUser -Password $debugVMPasswd -SSHPublicKeys (New-AzureSSHKey -PublicKey -Fingerprint `"690076D4C41C1DE677CD464EA63B44AE94C2E621`" -Path $debugSshPath) | Set-AzureEndpoint -Name `"SSH`" -LocalPort 22 -PublicPort 22 -Protocol `"TCP`"" + $newVmCmd = "New-AzureVM -ServiceName $serviceName -VMs ($newVmConfigCmd)" + + $out = RunAzureCmd -AzureCmdlet $newVmCmd + + $isVerified = VerifyAllDeployments -servicesToVerify @($serviceName) + if ($isVerified -eq "True") + { + $isConnected = isAllSSHPortsEnabled -DeployedServices $serviceName + if ($isConnected -ne "True") + { + return + } + } + + LogMsg "Removing image $ErrorImageName, keep the VHD $vhdLink" + Remove-AzureVMImage -ImageName $ErrorImageName + + LogMsg "Attaching VHD $vhdLink to VM $debugVMName" + $vm = Get-AzureVM -ServiceName $serviceName -Name $debugVMName + $vm | Add-AzureDataDisk -ImportFrom -MediaLocation $vhdLink -DiskLabel "main" -LUN 0 | Update-AzureVM + + $ip = (Get-AzureEndpoint -VM $vm)[0].Vip + + $runFile = "remote-scripts\GetLogFromDataDisk.py" + $out = RemoteCopy -uploadTo $ip -port 22 -files "$runFile" -username $debugVMUser -password $debugVMPasswd -upload + + $out = RunLinuxCmd -ip $ip -port 22 -username $debugVMUser -password $debugVMPasswd -command "chmod +x *" -runAsSudo + $out = RunLinuxCmd -ip $ip -port 22 -username $debugVMUser -password $debugVMPasswd -command "./GetLogFromDataDisk.py -u $debugVMUser" -runAsSudo + + $dir = "$LogDir\$vmName" + if (-not (Test-Path $dir)) + { + mkdir $dir + } + LogMsg "Downloading logs from the VHD" + $out = RemoteCopy -download -downloadFrom $ip -port 22 -files "/home/$debugVMUser/waagent.log" -downloadTo $dir -username $debugVMUser -password $debugVMPasswd + $out = RemoteCopy -download -downloadFrom $ip -port 22 -files "/home/$debugVMUser/messages.log" -downloadTo $dir -username $debugVMUser -password $debugVMPasswd + $out = RemoteCopy -download -downloadFrom $ip -port 22 -files "/home/$debugVMUser/dmesg.log" -downloadTo $dir -username $debugVMUser -password $debugVMPasswd + + LogMsg "Stopping VM $debugVMName" + $tmp = Stop-AzureVM -ServiceName $serviceName -Name $debugVMName -Force + + # Remove the Cloud Service + LogMsg "Executing: Remove-AzureService -ServiceName $serviceName -Force -DeleteAll" + Remove-AzureService -ServiceName $serviceName -Force -DeleteAll + } + catch + { + $ErrorMessage = $_.Exception.Message + LogMsg "EXCEPTION in GetLogsFromProvisionFailedVM() : $ErrorMessage" + } +} + Function Test-TCP($testIP, $testport) { $socket = new-object Net.Sockets.TcpClient @@ -4063,7 +4154,7 @@ Function StartAllDeployments($DeployedServices) $retryCount = 3 While(($retryCount -gt 0) -and !($isRestarted)) { - LogMsg "Staring : $($VM.Name)" + LogMsg "Starting : $($VM.Name)" $out = Start-AzureVM -ServiceName $hsName -Name $VM.Name $isRestarted = $? if ($isRestarted) diff --git a/remote-scripts/GetLogFromDataDisk.py b/remote-scripts/GetLogFromDataDisk.py new file mode 100644 index 0000000..499d0f1 --- /dev/null +++ b/remote-scripts/GetLogFromDataDisk.py @@ -0,0 +1,62 @@ +#!/usr/bin/python + +import sys +import os +import subprocess +import shutil +import re +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('-u', '--username', help='please input the user name ', required=True, type = str) +args = parser.parse_args() +username = args.username + +def Run(cmd): + proc=subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + proc.wait() + op = proc.stdout.read() + code=proc.returncode + int(code) + if code !=0: + exception = 1 + else: + return op + if exception == 1: + str_code = str(code) + return op + +output = Run("fdisk -l | grep /dev/sdc") +output = output.strip() +outputlist = re.split("\n", output) +diskname = outputlist[-1][:9] +print diskname + +os.makedirs("/mnt2") + +subprocess.call(["mount", diskname, "/mnt2"]) + +syslog_path = "/mnt2/var/log/syslog" +message_path = "/mnt2/var/log/messages" +waagent_path = "/mnt2/var/log/waagent.log" +dmesg_path = "/mnt2/var/log/dmesg" + +message_log = "/home/" + username + "/messages.log" +waagent_log = "/home/" + username + "/waagent.log" +dmesg_log = "/home/" + username + "/dmesg.log" + +Run("touch " + message_log) +Run("touch " + waagent_log) +Run("touch " + dmesg_log) + +if (os.path.exists(syslog_path)): + shutil.copyfile(syslog_path, message_log) +elif (os.path.exists(message_path)): + shutil.copyfile(message_path, message_log) +if (os.path.exists(waagent_path)): + shutil.copyfile(waagent_path, waagent_log) +if (os.path.exists(dmesg_path)): + shutil.copyfile(dmesg_path, dmesg_log) + + + diff --git a/remote-scripts/SETUP-INSTALL-PACKAGES.ps1 b/remote-scripts/SETUP-INSTALL-PACKAGES.ps1 index 99c6378..e4517f0 100644 --- a/remote-scripts/SETUP-INSTALL-PACKAGES.ps1 +++ b/remote-scripts/SETUP-INSTALL-PACKAGES.ps1 @@ -5,10 +5,9 @@ $testResult = "" $SetupStatus= "" $resultArr = @() -$isDeployed = DeployVMS -setupType $currentTestData.setupType -Distro $Distro -xmlConfig $xmlConfig +$isDeployed = DeployVMS -setupType $currentTestData.setupType -Distro $Distro -xmlConfig $xmlConfig -getLogsIfFailed $true if ($isDeployed) { - try { $testServiceData = Get-AzureService -ServiceName $isDeployed @@ -31,7 +30,17 @@ if ($isDeployed) LogMsg "Executing : $($currentTestData.testScript)" try{ - $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "python ./$($currentTestData.testScript)" -runAsSudo + $DistroName = DetectLinuxDistro -VIP $hs1VIP -SSHport $hs1vm1sshport -testVMUser $user -testVMPassword $password + + if ($DistroName -eq "COREOS") + { + RemoteCopy -uploadTo $hs1VIP -port $hs1vm1sshport -files "Tools\CoreosPreparationTools.zip" -username $user -password $password -upload + $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "/usr/share/oem/python/bin/python ./$($currentTestData.testScript)" -runAsSudo + } + else{ + $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "python ./$($currentTestData.testScript)" -runAsSudo -runMaxAllowedTime 1200 + } + $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "ls /home/$user/SetupStatus.txt 2>&1" -runAsSudo if($output -imatch "/home/$user/SetupStatus.txt") @@ -55,9 +64,17 @@ if ($isDeployed) if($SetupStatus -imatch "PACKAGE-INSTALL-CONFIG-PASS") { LogMsg "** All the required packages for the distro installed successfully **" - GetVMLogs -DeployedServices $isDeployed - #VM De-provision - $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "/usr/sbin/waagent -force -deprovision+user 2>&1" -runAsSudo + + #VM De-provision + if ($DistroName -eq "COREOS") + { + $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "/usr/share/oem/python/bin/python /usr/share/oem/bin/waagent -force -deprovision+user 2>&1" -runAsSudo + } + else { + GetVMLogs -DeployedServices $isDeployed + $output = RunLinuxCmd -username $user -password $password -ip $hs1VIP -port $hs1vm1sshport -command "/usr/sbin/waagent -force -deprovision+user 2>&1" -runAsSudo + } + if($output -match "home directory will be deleted") { LogMsg "** VM De-provisioned Successfully **" diff --git a/remote-scripts/SETUP-INSTALL-PACKAGES.py b/remote-scripts/SETUP-INSTALL-PACKAGES.py index b7109f6..44c85ff 100644 --- a/remote-scripts/SETUP-INSTALL-PACKAGES.py +++ b/remote-scripts/SETUP-INSTALL-PACKAGES.py @@ -64,6 +64,57 @@ def zypper_package_install(package): else: return False +def coreos_package_install(): + binpath="/usr/share/oem/bin" + pythonlibrary="/usr/share/oem/python/lib64/python2.7" + + # create /etc/hosts + ExecMultiCmdsLocalSudo(["touch /etc/hosts",\ + "echo '127.0.0.1 localhost' > /etc/hosts",\ + "echo '** modify /etc/hosts successfully **' >> PackageStatus.txt"]) + # copy tools to bin folder + Run("unzip -d CoreosPreparationTools ./CoreosPreparationTools.zip") + ExecMultiCmdsLocalSudo(["cp ./CoreosPreparationTools/killall " + binpath, \ + "cp ./CoreosPreparationTools/iperf " + binpath,\ + "cp ./CoreosPreparationTools/iozone " + binpath,\ + "cp ./CoreosPreparationTools/dos2unix " + binpath,\ + "cp ./CoreosPreparationTools/at " + binpath,\ + "chmod 755 "+ binpath + "/*",\ + "echo '** copy tools successfully **' >> PackageStatus.txt"]) + # copy python library to python library folder + Run("tar zxvf ./CoreosPreparationTools/pycrypto.tar.gz -C "+ pythonlibrary) + ExecMultiCmdsLocalSudo(["tar zxvf ./CoreosPreparationTools/ecdsa-0.13.tar.gz -C ./CoreosPreparationTools",\ + "cd ./CoreosPreparationTools/ecdsa-0.13",\ + "/usr/share/oem/python/bin/python setup.py install",\ + "cd ../.."]) + ExecMultiCmdsLocalSudo(["tar zxvf ./CoreosPreparationTools/paramiko-1.15.1.tar.gz -C ./CoreosPreparationTools",\ + "cd ./CoreosPreparationTools/paramiko-1.15.1",\ + "/usr/share/oem/python/bin/python setup.py install",\ + "cd ../..",\ + "tar zxvf ./CoreosPreparationTools/pexpect-3.3.tar.gz -C ./CoreosPreparationTools",\ + "cd ./CoreosPreparationTools/pexpect-3.3",\ + "/usr/share/oem/python/bin/python setup.py install",\ + "cd ../.."]) + ExecMultiCmdsLocalSudo(["tar zxvf ./CoreosPreparationTools/dnspython-1.12.0.tar.gz -C ./CoreosPreparationTools",\ + "cd ./CoreosPreparationTools/dnspython-1.12.0",\ + "/usr/share/oem/python/bin/python setup.py install",\ + "cd ../.."]) + if not os.path.exists (pythonlibrary + "/site-packages/pexpect"): + RunLog.info ("pexpect package installation failed!") + Run("echo '** pexpect package installation failed **' >> PackageStatus.txt") + return False + if not os.path.exists (pythonlibrary + "/site-packages/paramiko"): + RunLog.info ("paramiko packages installation failed!") + Run("echo '** paramiko packages installed failed **' >> PackageStatus.txt") + return False + if not os.path.exists (pythonlibrary + "/site-packages/dns"): + RunLog.info ("dnspython packages installation failed!") + Run("echo '** dnspython packages installed failed **' >> PackageStatus.txt") + return False + RunLog.info ("pexpect, paramiko and dnspython packages installed successfully!") + Run("echo '** pexpect, paramiko and dnspython packages installed successfully **' >> PackageStatus.txt") + return True + def install_waagent_from_github(): RunLog.info ("Installing waagent from github...") @@ -214,17 +265,25 @@ def RunTest(): elif node.tag == "tar_link": tar_link[node.attrib["name"]] = node.text - for package in packages_list: - if(not install_package(package)): + if not (current_distro=="coreos"): + for package in packages_list: + if(not install_package(package)): + success = False + Run("echo '"+package+"' failed to install >> PackageStatus.txt") + #break + else: + Run("echo '"+package+"' installed successfully >> PackageStatus.txt") + else: + if (not coreos_package_install()): success = False - Run("echo '"+package+"' failed to install >> PackageStatus.txt") - #break + Run("echo 'coreos packages failed to install' >> PackageStatus.txt") else: - Run("echo '"+package+"' installed successfully >> PackageStatus.txt") + Run("echo 'coreos support tools installed successfully' >> PackageStatus.txt") Run("echo '** Packages Installation Completed **' >> PackageStatus.txt") if success == True: - ConfigFilesUpdate() + if not (current_distro=="coreos"): + ConfigFilesUpdate() if success == True: RunLog.info('PACKAGE-INSTALL-CONFIG-PASS') Run("echo 'PACKAGE-INSTALL-CONFIG-PASS' >> SetupStatus.txt")