Run secnetperf in Separate Process to Wait on (with Timeout) (#4062)

2024-01-18 15:32:54 -05:00 · 2024-01-18 15:32:54 -05:00 · f3ac189445
--- a/.github/workflows/netperf.yml
+++ b/.github/workflows/netperf.yml
@ -5,9 +5,35 @@ on:
  push:
    branches:
    - main
+    paths:
+    - .github/workflows/netperf.yml
+    - .gitmodules
+    - scripts/secnetperf.ps1
+    - scripts/secnetperf-helpers.psm1
+    - scripts/prepare-machine.ps1
+    - src/bin/*
+    - src/core/*
+    - src/platform/*
+    - src/perf/*
+    - submodules/openssl/*
+    - submodules/openssl3/*
+    - submodules/xdp-for-windows/*
  pull_request:
    branches:
    - main
+    paths:
+    - .github/workflows/netperf.yml
+    - .gitmodules
+    - scripts/secnetperf.ps1
+    - scripts/secnetperf-helpers.psm1
+    - scripts/prepare-machine.ps1
+    - src/bin/*
+    - src/core/*
+    - src/platform/*
+    - src/perf/*
+    - submodules/openssl/*
+    - submodules/openssl3/*
+    - submodules/xdp-for-windows/*

 concurrency:
  # Cancel any workflow currently in progress for the same PR.
--- a/scripts/secnetperf-helpers.psm1
+++ b/scripts/secnetperf-helpers.psm1
@ -49,7 +49,6 @@ function Collect-LocalDumps {
            Remove-Item -Path "./artifacts/crashdumps/*" -Force
            return $true
        }
-    } else {
    }
    return $false
 }
@ -73,6 +72,7 @@ function Collect-RemoteDumps {
            return $true
        }
    } else {
+        # TODO - Collect Linux dumps.
    }
    return $false
 }
@ -82,23 +82,23 @@ function Collect-RemoteDumps {
 function Start-RemoteServer {
    param ($Session, $Command)
    # Start the server on the remote in an async job.
-    $Job = Invoke-Command -Session $Session -ScriptBlock { iex $Using:Command } -AsJob
+    $job = Invoke-Command -Session $Session -ScriptBlock { iex $Using:Command } -AsJob
    # Poll the job for 10 seconds to see if it started.
    $StopWatch =  [system.diagnostics.stopwatch]::StartNew()
    while ($StopWatch.ElapsedMilliseconds -lt 30000) {
-        $CurrentResults = Receive-Job -Job $Job -Keep -ErrorAction Continue
+        $CurrentResults = Receive-Job -Job $job -Keep -ErrorAction Continue
        if (![string]::IsNullOrWhiteSpace($CurrentResults)) {
            $DidMatch = $CurrentResults -match "Started!" # Look for the special string to indicate success.
            if ($DidMatch) {
-                return $Job
+                return $job
            }
        }
        Start-Sleep -Seconds 0.1 | Out-Null
    }

    # On failure, dump the output of the job.
-    Stop-Job -Job $Job
-    $RemoteResult = Receive-Job -Job $Job -ErrorAction Stop
+    Stop-Job -Job $job
+    $RemoteResult = Receive-Job -Job $job -ErrorAction Stop
    $RemoteResult = $RemoteResult -join "`n"
    Write-Host $RemoteResult.ToString()
    throw "Server failed to start!"
@ -131,19 +131,62 @@ function Stop-RemoteServer {
    throw "Server failed to stop!"
 }

-class TestResult {
-    [String]$Metric;
-    [System.Object[]]$Values;
-    [Boolean]$HasFailures;
+# Creates a new local process to asynchronously run the test.
+function Start-LocalTest {
+    param ($FullPath, $FullArgs, $OutputDir)
+    $pinfo = New-Object System.Diagnostics.ProcessStartInfo
+    if ($IsWindows) {
+        $pinfo.FileName = $FullPath
+        $pinfo.Arguments = $FullArgs
+    } else {
+        $pinfo.FileName = "bash"
+        $pinfo.Arguments = "-c `"ulimit -c unlimited && LSAN_OPTIONS=report_objects=1 ASAN_OPTIONS=disable_coredump=0:abort_on_error=1 UBSAN_OPTIONS=halt_on_error=1:print_stacktrace=1 $FullPath $FullArgs && echo ''`""
+        $pinfo.WorkingDirectory = $OutputDir
+    }
+    $pinfo.RedirectStandardOutput = $true
+    $pinfo.RedirectStandardError = $true
+    $pinfo.UseShellExecute = $false
+    $p = New-Object System.Diagnostics.Process
+    $p.StartInfo = $pinfo
+    $p.Start() | Out-Null
+    $p
+}

-    TestResult (
-        [String]$Metric,
-        [System.Object[]]$Values,
-        [Boolean]$HasFailures
-    ) {
-        $this.Metric = $Metric;
-        $this.Values = $Values;
-        $this.HasFailures = $HasFailures;
+# Waits for a local test process to complete, and then returns the console output.
+function Wait-LocalTest {
+    param ($Process, $TimeoutMs)
+    $StdOut = $Process.StandardOutput.ReadToEndAsync()
+    $StdError = $Process.StandardError.ReadToEndAsync()
+    if (!$Process.WaitForExit($TimeoutMs)) {
+        $Process.Kill() # TODO - Use procdump or livedump to get a dump first!
+        throw "secnetperf: Client timed out!"
+    }
+    if ($Process.ExitCode -ne 0) {
+        throw "secnetperf: Nonzero exit code: $($Process.ExitCode)"
+    }
+    [System.Threading.Tasks.Task]::WaitAll(@($StdOut, $StdError))
+    $consoleTxt = $StdOut.Result.Trim()
+    if ($null -eq $consoleTxt -or $consoleTxt.Length -eq 0) {
+        throw "secnetperf: No console output (possibly crashed)!"
+    }
+    if ($consoleTxt.Contains("Error")) {
+        throw "secnetperf: $($consoleTxt.Substring(7))" # Skip over the 'Error: ' prefix
+    }
+    return $consoleTxt
+}
+
+# Parses the console output of secnetperf to extract the metric value.
+function Get-TestOutput {
+    param ($Output, $Metric)
+    if ($Metric -eq "latency") {
+        $latency_percentiles = '(?<=\d{1,3}(?:\.\d{1,2})?th: )\d+'
+        return [regex]::Matches($Output, $latency_percentiles) | ForEach-Object {$_.Value}
+    } elseif ($Metric -eq "hps") {
+        $rawOutput -match '(\d+) HPS' | Out-Null
+        return $matches[1]
+    } else { # throughput
+        $Output -match '@ (\d+) kbps' | Out-Null
+        return $matches[1]
    }
 }

@ -166,9 +209,15 @@ function Invoke-Secnetperf {

    for ($tcp = 0; $tcp -lt 2; $tcp++) {

+    # Set up all the parameters and paths for running the test.
+    Write-Host "> secnetperf $ExeArgs -tcp:$tcp"
    $execMode = $ExeArgs.Substring(0, $ExeArgs.IndexOf(' ')) # First arg is the exec mode
-    $command = "./$SecNetPerfPath -target:netperf-peer $ExeArgs -tcp:$tcp -trimout -watchdog:45000"
-    Write-Host "> $command"
+    $fullPath = Join-Path (Split-Path $PSScriptRoot -Parent) $SecNetPerfPath
+    $fullArgs = "-target:netperf-peer $ExeArgs -tcp:$tcp -trimout -watchdog:45000"
+    $artifactName = $tcp -eq 0 ? "$metric-quic" : "$metric-tcp"
+    New-Item -ItemType Directory "artifacts/logs/$artifactName" -ErrorAction Ignore | Out-Null
+    $localDumpDir = Join-Path (Split-Path $PSScriptRoot -Parent) "artifacts/logs/$artifactName/clientdumps"
+    New-Item -ItemType Directory $localDumpDir -ErrorAction Ignore | Out-Null

    if ($LogProfile -ne "" -and $LogProfile -ne "NULL") {
        Invoke-Command -Session $Session -ScriptBlock {
@ -182,31 +231,17 @@ function Invoke-Secnetperf {
    try {

    # Start the server running.
-    $Job = Start-RemoteServer $Session "$RemoteDir/$SecNetPerfPath $execMode"
+    $job = Start-RemoteServer $Session "$RemoteDir/$SecNetPerfPath $execMode"

    # Run the test multiple times, failing (for now) only if all tries fail.
    # TODO: Once all failures have been fixed, consider all errors fatal.
    $successCount = 0
    for ($try = 0; $try -lt 3; $try++) {
        try {
-            $rawOutput = Invoke-Expression $command
-            if ($null -eq $rawOutput) {
-                throw "secnetperf: No console output (possibly crashed)!"
-            }
-            if ($rawOutput.Contains("Error")) {
-                throw "secnetperf: $($rawOutput.Substring(7))" # Skip over the 'Error: ' prefix
-            }
+            $process = Start-LocalTest $fullPath $fullArgs $localDumpDir
+            $rawOutput = Wait-LocalTest $process 60000 # 1 minute timeout
            Write-Host $rawOutput
-            if ($metric -eq "latency") {
-                $latency_percentiles = '(?<=\d{1,3}(?:\.\d{1,2})?th: )\d+'
-                $values[$tcp] += [regex]::Matches($rawOutput, $latency_percentiles) | ForEach-Object {$_.Value}
-            } elseif ($metric -eq "hps") {
-                $rawOutput -match '(\d+) HPS'
-                $values[$tcp] += $matches[1]
-            } else { # throughput
-                $rawOutput -match '@ (\d+) kbps'
-                $values[$tcp] += $matches[1]
-            }
+            $values[$tcp] += Get-TestOutput $rawOutput $metric
            $successCount++
        } catch {
            Write-GHError $_
@ -225,35 +260,37 @@ function Invoke-Secnetperf {
        $_ | Format-List *
        $hasFailures = $true
    } finally {
-        $ArtifactName = $tcp -eq 0 ? "$metric-quic" : "$metric-tcp"
-
        # Stop the server.
-        try { Stop-RemoteServer $Job $RemoteName | Out-Null } catch { } # Ignore failures for now
+        try { Stop-RemoteServer $job $RemoteName | Out-Null } catch { } # Ignore failures for now

        # Stop logging and copy the logs to the artifacts folder.
        if ($LogProfile -ne "" -and $LogProfile -ne "NULL") {
-            try { .\scripts\log.ps1 -Stop -OutputPath "./artifacts/logs/$ArtifactName/client" -RawLogOnly }
+            try { .\scripts\log.ps1 -Stop -OutputPath "./artifacts/logs/$artifactName/client" -RawLogOnly }
            catch { Write-Host "Failed to stop logging on client!" }
            Invoke-Command -Session $Session -ScriptBlock {
                try {
-                    & "$Using:RemoteDir/scripts/log.ps1" -Stop -OutputPath "$Using:RemoteDir/artifacts/logs/$Using:ArtifactName/server" -RawLogOnly
-                    dir "$Using:RemoteDir/artifacts/logs/$Using:ArtifactName"
+                    & "$Using:RemoteDir/scripts/log.ps1" -Stop -OutputPath "$Using:RemoteDir/artifacts/logs/$Using:artifactName/server" -RawLogOnly
+                    dir "$Using:RemoteDir/artifacts/logs/$Using:artifactName"
                } catch { Write-Host "Failed to stop logging on server!" }
            }
-            try { Copy-Item -FromSession $Session "$RemoteDir/artifacts/logs/$ArtifactName/*" "./artifacts/logs/$ArtifactName/" }
+            try { Copy-Item -FromSession $Session "$RemoteDir/artifacts/logs/$artifactName/*" "./artifacts/logs/$artifactName/" }
            catch { Write-Host "Failed to copy server logs!" }
        }

        # Grab any crash dumps that were generated.
-        if (Collect-LocalDumps "./artifacts/logs/$ArtifactName/clientdumps") {
+        if (Collect-LocalDumps $localDumpDir) {
            Write-Host "Dump file(s) generated locally"
            #$hasFailures = $true
        }
-        if (Collect-RemoteDumps $Session "./artifacts/logs/$ArtifactName/serverdumps") {
+        if (Collect-RemoteDumps $Session "./artifacts/logs/$artifactName/serverdumps") {
            Write-Host "Dump file(s) generated on peer"
            #$hasFailures = $true
        }
    }}

-    return [TestResult]::new($metric, $values, $hasFailures)
+    return [pscustomobject]@{
+        Metric = $metric
+        Values = $values
+        HasFailures = $hasFailures
+    }
 }
--- a/scripts/secnetperf.ps1
+++ b/scripts/secnetperf.ps1
@ -131,8 +131,22 @@ if (!$isWindows) {
        $env:LD_LIBRARY_PATH = "${env:LD_LIBRARY_PATH}:$Using:RemoteDir/$Using:SecNetPerfDir"
        chmod +x "$Using:RemoteDir/$Using:SecNetPerfPath"
    }
-    $env:LD_LIBRARY_PATH = "${env:LD_LIBRARY_PATH}:./$SecNetPerfDir"
+    $fullPath = Join-Path (Split-Path $PSScriptRoot -Parent) $SecNetPerfDir
+    $env:LD_LIBRARY_PATH = "${env:LD_LIBRARY_PATH}:$fullPath"
    chmod +x "./$SecNetPerfPath"
+
+    if ((Get-Content "/etc/security/limits.conf") -notcontains "root soft core unlimited") {
+        # Enable core dumps for the system.
+        Write-Host "Setting core dump size limit..."
+        sudo sh -c "echo 'root soft core unlimited' >> /etc/security/limits.conf"
+        sudo sh -c "echo 'root hard core unlimited' >> /etc/security/limits.conf"
+        sudo sh -c "echo '* soft core unlimited' >> /etc/security/limits.conf"
+        sudo sh -c "echo '* hard core unlimited' >> /etc/security/limits.conf"
+    }
+
+    # Set the core dump pattern.
+    Write-Host "Setting core dump pattern..."
+    sudo sh -c "echo -n '%e.%p.%t.core' > /proc/sys/kernel/core_pattern"
 }

 # Run all the test cases.
@ -166,6 +180,18 @@ VALUES ($TestId, '$MsQuicCommit', $env, $env, $item, NULL);

 Write-Host "Tests complete!"

+if (Get-ChildItem -Path ./artifacts/logs -File -Recurse) {
+    # Logs or dumps were generated. Copy the necessary symbols/files to the same
+    # direcotry be able to open them.
+    Write-Host "Copying debugging files to logs directory..."
+    if ($isWindows) {
+        Copy-Item "$SecNetPerfDir/*.pdb" ./artifacts/logs
+    } else {
+        Copy-Item "$SecNetPerfDir/libmsquic.so" ./artifacts/logs
+        Copy-Item "$SecNetPerfDir/secnetperf" ./artifacts/logs
+    }
+}
+
 } catch {
    Write-GHError "Exception while running tests!"
    Write-GHError $_