Print Statistics in netperf Throughput Tests (#4078)

2024-01-24 15:21:09 -05:00 · 2024-01-24 15:21:09 -05:00 · f370758990
--- a/scripts/secnetperf-helpers.psm1
+++ b/scripts/secnetperf-helpers.psm1
@ -49,7 +49,9 @@ function Collect-LocalDumps {
        if ($DumpFiles) {
            mkdir $OutputDir -ErrorAction Ignore | Out-Null
            foreach ($File in $DumpFiles) {
-                Copy-Item -Path $File.FullName -Destination $OutputDir
+                $NewFileName = $File.Name -replace "secnetperf.exe", "secnetperf.exe.client"
+                $NewFilePath = Join-Path $OutputDir $NewFileName
+                Copy-Item -Path $File.FullName -Destination $NewFilePath
            }
            # Delete all the files in the crashdumps folder.
            Remove-Item -Path "./artifacts/crashdumps/*" -Force
@ -69,7 +71,9 @@ function Collect-RemoteDumps {
        if ($DumpFiles) {
            mkdir $OutputDir -ErrorAction Ignore | Out-Null
            foreach ($File in $DumpFiles) {
-                Copy-Item -FromSession $Session -Path $File.FullName -Destination $OutputDir
+                $NewFileName = $File.Name -replace "secnetperf.exe", "secnetperf.exe.server"
+                $NewFilePath = Join-Path $OutputDir $NewFileName
+                Copy-Item -FromSession $Session -Path $File.FullName -Destination $NewFilePath
            }
            # Delete all the files in the crashdumps folder.
            Invoke-Command -Session $Session -ScriptBlock {
@ -92,17 +96,17 @@ function Collect-LocalDump {
        Write-Host "procdump64.exe not found!"
        return;
    }
-    $dumpPath = Join-Path $OutputDir "secnetperf.exe.$($Process.Id).dmp"
+    $dumpPath = Join-Path $OutputDir "secnetperf.exe.client.$($Process.Id).dmp"
    & $procDump -accepteula -ma $($Process.Id) $dumpPath
 }

 # Use livekd64.exe to collect a dump of the kernel.
 function Collect-LiveKD {
-    param ($OutputDir)
+    param ($OutputDir, $Prefix)
    if (!$isWindows) { return } # Not supported on Windows
    $liveKD = Repo-Path "artifacts/corenet-ci-main/vm-setup/livekd64.exe"
    $KD = Repo-Path "artifacts/corenet-ci-main/vm-setup/kd.exe"
-    $dumpPath = Join-Path $OutputDir "kernel.$(New-Guid).dmp"
+    $dumpPath = Join-Path $OutputDir "kernel.$Prefix.$(New-Guid).dmp"
    & $liveKD -o $dumpPath -k $KD -ml -accepteula
 }

@ -301,7 +305,7 @@ function Wait-LocalTest {
    $StdOut = $Process.StandardOutput.ReadToEndAsync()
    $StdError = $Process.StandardError.ReadToEndAsync()
    if (!$Process.WaitForExit($TimeoutMs)) {
-        if ($testKernel) { Collect-LiveKD $OutputDir }
+        if ($testKernel) { Collect-LiveKD $OutputDir "client" }
        Collect-LocalDump $Process $OutputDir
        try { $Process.Kill() } catch { }
        try {
@ -351,15 +355,13 @@ function Get-TestOutput {

 # Invokes secnetperf with the given arguments for both TCP and QUIC.
 function Invoke-Secnetperf {
-    param ($Session, $RemoteName, $RemoteDir, $SecNetPerfPath, $LogProfile, $ExeArgs, $io)
-    # TODO: This logic is pretty fragile. Needs improvement.
-    $metric = "throughput-download"
+    param ($Session, $RemoteName, $RemoteDir, $SecNetPerfPath, $LogProfile, $TestId, $ExeArgs, $io)
+
+    $metric = "throughput"
    if ($exeArgs.Contains("plat:1")) {
        $metric = "latency"
    } elseif ($exeArgs.Contains("prate:1")) {
        $metric = "hps"
-    } elseif ($exeArgs.Contains("-up")) {
-        $metric = "throughput-upload"
    }

    $values = @(@(), @())
@ -380,10 +382,17 @@ function Invoke-Secnetperf {
        $serverArgs += " -driverNamePriv:secnetperfdrvpriv"
        $clientArgs += " -driverNamePriv:secnetperfdrvpriv"
    }
-    $artifactName = $tcp -eq 0 ? "$metric-quic" : "$metric-tcp"
+    if ($metric -eq "throughput") {
+        $serverArgs += " -pconn:1 -pstream:1"
+        $clientArgs += " -pconn:1 -pstream:1"
+    } elseif ($metric -eq "latency") {
+        $serverArgs += " -pconn:1"
+        $clientArgs += " -pconn:1"
+    }
+    $artifactName = $tcp -eq 0 ? "$TestId-quic" : "$TestId-tcp"
    New-Item -ItemType Directory "artifacts/logs/$artifactName" -ErrorAction Ignore | Out-Null
-    $localDumpDir = Repo-Path "artifacts/logs/$artifactName/clientdumps"
-    New-Item -ItemType Directory $localDumpDir -ErrorAction Ignore | Out-Null
+    $artifactDir = Repo-Path "artifacts/logs/$artifactName"
+    New-Item -ItemType Directory $artifactDir -ErrorAction Ignore | Out-Null

    # Start logging on both sides, if configured.
    if ($LogProfile -ne "" -and $LogProfile -ne "NULL") {
@ -395,7 +404,7 @@ function Invoke-Secnetperf {
        .\scripts\log.ps1 -Start -Profile $LogProfile
    }

-    Write-Host "> secnetperf $clientArgs"
+    Write-Host "::group::> secnetperf $clientArgs"

    try {

@ -405,52 +414,57 @@ function Invoke-Secnetperf {
    # Run the test multiple times, failing (for now) only if all tries fail.
    # TODO: Once all failures have been fixed, consider all errors fatal.
    $successCount = 0
+    $testFailures = $false
    for ($try = 0; $try -lt 3; $try++) {
+        Write-Host "==============================`nRUN $($try+1):"
        try {
-            $process = Start-LocalTest $clientPath $clientArgs $localDumpDir
-            $rawOutput = Wait-LocalTest $process $localDumpDir ($io -eq "wsk") 30000
+            $process = Start-LocalTest $clientPath $clientArgs $artifactDir
+            $rawOutput = Wait-LocalTest $process $artifactDir ($io -eq "wsk") 30000
            Write-Host $rawOutput
            $values[$tcp] += Get-TestOutput $rawOutput $metric
            $successCount++
        } catch {
            Write-GHError $_
-            #$hasFailures = $true
+            #$testFailures = $true
        }
        Start-Sleep -Seconds 1 | Out-Null
    }
    if ($successCount -eq 0) {
-        $hasFailures = $true # For now, consider failure only if all failed
-        Write-GHError "secnetperf: All test tries failed!"
+        $testFailures = $true # For now, consider failure only if all failed
    }

    } catch {
        Write-GHError "Exception while running test case!"
        Write-GHError $_
        $_ | Format-List *
-        $hasFailures = $true
+        $testFailures = $true
    } finally {
        # Stop the server.
        try { Stop-RemoteServer $job $RemoteName | Out-Null } catch { } # Ignore failures for now

        # Stop any logging and copy the logs to the artifacts folder.
        if ($LogProfile -ne "" -and $LogProfile -ne "NULL") {
-            try { .\scripts\log.ps1 -Stop -OutputPath "./artifacts/logs/$artifactName/client" -RawLogOnly }
+            try { .\scripts\log.ps1 -Stop -OutputPath "$artifactDir/client" -RawLogOnly }
            catch { Write-Host "Failed to stop logging on client!" }
            Invoke-Command -Session $Session -ScriptBlock {
                try { & "$Using:RemoteDir/scripts/log.ps1" -Stop -OutputPath "$Using:RemoteDir/artifacts/logs/$Using:artifactName/server" -RawLogOnly }
                catch { Write-Host "Failed to stop logging on server!" }
            }
-            try { Copy-Item -FromSession $Session "$RemoteDir/artifacts/logs/$artifactName/*" "./artifacts/logs/$artifactName/" -Recurse }
+            try { Copy-Item -FromSession $Session "$RemoteDir/artifacts/logs/$artifactName/*" $artifactDir -Recurse }
            catch { Write-Host "Failed to copy server logs!" }
        }

        # Grab any crash dumps that were generated.
-        if (Collect-LocalDumps $localDumpDir) {
-            #$hasFailures = $true
-        }
-        if (Collect-RemoteDumps $Session "./artifacts/logs/$artifactName/serverdumps") {
+        if (Collect-LocalDumps $artifactDir) { }
+        if (Collect-RemoteDumps $Session $artifactDir) {
            Write-GHError "Dump file(s) generated by server"
-            #$hasFailures = $true
+        }
+
+        Write-Host "::endgroup::"
+        if ($testFailures) {
+            $hasFailures = $true
+            # Write outside the group to make it easier to find in the logs.
+            Write-GHError "secnetperf: Test failures encountered!"
        }
    }}

--- a/scripts/secnetperf.ps1
+++ b/scripts/secnetperf.ps1
@ -182,14 +182,14 @@ if (!$isWindows) {

    # Set the core dump pattern.
    Write-Host "Setting core dump pattern"
-    sudo sh -c "echo -n "%e.%p.%t.core" > /proc/sys/kernel/core_pattern"
+    sudo sh -c "echo -n "%e.client.%p.%t.core" > /proc/sys/kernel/core_pattern"
 }

 # Run all the test cases.
 Write-Host "Setup complete! Running all tests"
 foreach ($testId in $allTests.Keys) {
    $ExeArgs = $allTests[$testId] + " -io:$io"
-    $Output = Invoke-Secnetperf $Session $RemoteName $RemoteDir $SecNetPerfPath $LogProfile $ExeArgs $io
+    $Output = Invoke-Secnetperf $Session $RemoteName $RemoteDir $SecNetPerfPath $LogProfile $testId $ExeArgs $io
    $Test = $Output[-1]
    if ($Test.HasFailures) { $hasFailures = $true }

@ -202,17 +202,15 @@ INSERT OR IGNORE INTO Secnetperf_tests (Secnetperf_test_ID, Kernel_mode, Run_arg
    for ($tcp = 0; $tcp -lt $Test.Values.Length; $tcp++) {
        if ($Test.Values[$tcp].Length -eq 0) { continue }
        $transport = $tcp -eq 1 ? "tcp" : "quic"
-        $json["$($Test.Metric)-$transport"] = $Test.Values[$tcp]
-        if ($Test.Metric.startsWith("throughput")) {
+        $json["$testId-$transport"] = $Test.Values[$tcp]
+        if ($Test.Metric -eq "throughput") {
            foreach ($item in $Test.Values[$tcp]) {
                $SQL += @"
 `nINSERT INTO Secnetperf_test_runs (Secnetperf_test_ID, Secnetperf_commit, Client_environment_ID, Server_environment_ID, Result, Secnetperf_latency_stats_ID, io, tls)
 VALUES ("$TestId-tcp-$tcp", "$MsQuicCommit", $env, $env, $item, NULL, "$io", "$tls");
 "@
            }
-        }
-
-        if ($Test.Metric.startsWith("latency")) {
+        } elseif ($Test.Metric -eq "latency") {
            # Test.Values[...] is a flattened 1D array of the form: [ first run + RPS, second run + RPS, third run + RPS..... ], ie. if each run has 8 values + RPS, then the array has 27 elements (8*3 + 3)
            for ($offset = 0; $offset -lt $Test.Values[$tcp].Length; $offset += 9) {
                $SQL += @"
--- a/src/perf/lib/PerfClient.cpp
+++ b/src/perf/lib/PerfClient.cpp
@ -946,7 +946,7 @@ PerfClientStream::OnShutdown() {
            const auto ElapsedMicroseconds = RecvEndTime - StartTime;
            const auto Rate = (uint32_t)((TotalBytes * 1000 * 1000 * 8) / (1000 * ElapsedMicroseconds));
            WriteOutput(
-                "  Upload: %llu bytes @ %u kbps (%u.%03u ms).\n",
+                "Result: Upload %llu bytes @ %u kbps (%u.%03u ms).\n",
                (unsigned long long)TotalBytes,
                Rate,
                (uint32_t)(ElapsedMicroseconds / 1000),
@ -966,7 +966,7 @@ PerfClientStream::OnShutdown() {
            const auto ElapsedMicroseconds = RecvEndTime - StartTime;
            const auto Rate = (uint32_t)((TotalBytes * 1000 * 1000 * 8) / (1000 * ElapsedMicroseconds));
            WriteOutput(
-                "Download: %llu bytes @ %u kbps (%u.%03u ms).\n",
+                "Result: Download %llu bytes @ %u kbps (%u.%03u ms).\n",
                (unsigned long long)TotalBytes,
                Rate,
                (uint32_t)(ElapsedMicroseconds / 1000),
--- a/src/perf/lib/SecNetPerf.h
+++ b/src/perf/lib/SecNetPerf.h
@ -140,28 +140,33 @@ QuicPrintConnectionStatistics(
 {
    QUIC_STATISTICS_V2 Statistics;
    uint32_t StatsSize = sizeof(Statistics);
-    if (QUIC_SUCCEEDED(
-        ApiTable->GetParam(
-            Connection,
-            QUIC_PARAM_CONN_STATISTICS_V2,
-            &StatsSize,
-            &Statistics))) {
-        WriteOutput(
-            "[conn][%p] STATS: EcnCapable=%u RTT=%u us SendTotalPackets=%llu SendSuspectedLostPackets=%llu SendSpuriousLostPackets=%llu SendCongestionCount=%u SendEcnCongestionCount=%u RecvTotalPackets=%llu RecvReorderedPackets=%llu RecvDroppedPackets=%llu RecvDuplicatePackets=%llu RecvDecryptionFailures=%llu\n",
-            Connection,
-            Statistics.EcnCapable,
-            Statistics.Rtt,
-            (unsigned long long)Statistics.SendTotalPackets,
-            (unsigned long long)Statistics.SendSuspectedLostPackets,
-            (unsigned long long)Statistics.SendSpuriousLostPackets,
-            Statistics.SendCongestionCount,
-            Statistics.SendEcnCongestionCount,
-            (unsigned long long)Statistics.RecvTotalPackets,
-            (unsigned long long)Statistics.RecvReorderedPackets,
-            (unsigned long long)Statistics.RecvDroppedPackets,
-            (unsigned long long)Statistics.RecvDuplicatePackets,
-            (unsigned long long)Statistics.RecvDecryptionFailures);
-    }
+    ApiTable->GetParam(Connection, QUIC_PARAM_CONN_STATISTICS_V2, &StatsSize, &Statistics);
+    WriteOutput(
+        "Connection Statistics:\n"
+        "  RTT                       %u us\n"
+        "  EcnCapable                %u\n"
+        "  SendTotalPackets          %llu\n"
+        "  SendSuspectedLostPackets  %llu\n"
+        "  SendSpuriousLostPackets   %llu\n"
+        "  SendCongestionCount       %u\n"
+        "  SendEcnCongestionCount    %u\n"
+        "  RecvTotalPackets          %llu\n"
+        "  RecvReorderedPackets      %llu\n"
+        "  RecvDroppedPackets        %llu\n"
+        "  RecvDuplicatePackets      %llu\n"
+        "  RecvDecryptionFailures    %llu\n",
+        Statistics.Rtt,
+        Statistics.EcnCapable,
+        (unsigned long long)Statistics.SendTotalPackets,
+        (unsigned long long)Statistics.SendSuspectedLostPackets,
+        (unsigned long long)Statistics.SendSpuriousLostPackets,
+        Statistics.SendCongestionCount,
+        Statistics.SendEcnCongestionCount,
+        (unsigned long long)Statistics.RecvTotalPackets,
+        (unsigned long long)Statistics.RecvReorderedPackets,
+        (unsigned long long)Statistics.RecvDroppedPackets,
+        (unsigned long long)Statistics.RecvDuplicatePackets,
+        (unsigned long long)Statistics.RecvDecryptionFailures);
 }

 inline
@ -174,21 +179,22 @@ QuicPrintStreamStatistics(
    QUIC_STREAM_STATISTICS Stats = {0};
    uint32_t BufferLength = sizeof(Stats);
    ApiTable->GetParam(Stream, QUIC_PARAM_STREAM_STATISTICS, &BufferLength, &Stats);
-    WriteOutput("Flow blocked timing:\n");
-    WriteOutput("SCHEDULING:             %llu us\n",
-        (unsigned long long)Stats.ConnBlockedBySchedulingUs);
-    WriteOutput("PACING:                 %llu us\n",
-        (unsigned long long)Stats.ConnBlockedByPacingUs);
-    WriteOutput("AMPLIFICATION_PROT:     %llu us\n",
-        (unsigned long long)Stats.ConnBlockedByAmplificationProtUs);
-    WriteOutput("CONGESTION_CONTROL:     %llu us\n",
-        (unsigned long long)Stats.ConnBlockedByCongestionControlUs);
-    WriteOutput("CONN_FLOW_CONTROL:      %llu us\n",
-        (unsigned long long)Stats.ConnBlockedByFlowControlUs);
-    WriteOutput("STREAM_ID_FLOW_CONTROL: %llu us\n",
-        (unsigned long long)Stats.StreamBlockedByIdFlowControlUs);
-    WriteOutput("STREAM_FLOW_CONTROL:    %llu us\n",
-        (unsigned long long)Stats.StreamBlockedByFlowControlUs);
-    WriteOutput("APP:                    %llu us\n",
+    WriteOutput(
+        "Stream Timings (flow blocked):\n"
+        "  SCHEDULING:               %llu us\n"
+        "  PACING:                   %llu us\n"
+        "  AMPLIFICATION_PROT:       %llu us\n"
+        "  CONGESTION_CONTROL:       %llu us\n"
+        "  CONN_FLOW_CONTROL:        %llu us\n"
+        "  STREAM_ID_FLOW_CONTROL:   %llu us\n"
+        "  STREAM_FLOW_CONTROL:      %llu us\n"
+        "  APP:                      %llu us\n",
+        (unsigned long long)Stats.ConnBlockedBySchedulingUs,
+        (unsigned long long)Stats.ConnBlockedByPacingUs,
+        (unsigned long long)Stats.ConnBlockedByAmplificationProtUs,
+        (unsigned long long)Stats.ConnBlockedByCongestionControlUs,
+        (unsigned long long)Stats.ConnBlockedByFlowControlUs,
+        (unsigned long long)Stats.StreamBlockedByIdFlowControlUs,
+        (unsigned long long)Stats.StreamBlockedByFlowControlUs,
        (unsigned long long)Stats.StreamBlockedByAppUs);
 }