Implement watermark idea (reopen PR) (#4193)

* add function implementation for latency regression

* use watermark json

* baseline is BS

* improve error messages

* bored on a Friday

* move helpers to the modules file

* fix parameter bug

---------

Co-authored-by: Jack He <jackhe@microsoft.com>
This commit is contained in:
Jack He (Github) 2024-03-14 12:00:51 -07:00 коммит произвёл GitHub
Родитель 43caec6b55
Коммит cacfccae19
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
2 изменённых файлов: 85 добавлений и 39 удалений

Просмотреть файл

@ -575,3 +575,78 @@ function Invoke-Secnetperf {
HasFailures = $hasFailures
}
}
function CheckRegressionResult($values, $testid, $transport, $regressionJson, $envStr) {
# Returns true if there is a regression in this new run.
$sum = 0
foreach ($item in $values) {
$sum += $item
}
$avg = $sum / $values.Length
$Testid = "$testid-$transport"
try {
# TODO: baseline is a bad name. Use LowerThreshold / UpperThreshold instead.
$baseline = $regressionJson.$Testid.$envStr.baseline
} catch {
Write-Host "No regression baseline found"
return "NULL"
}
try {
$BestResult = $regressionJson.$Testid.$envStr.BestResult
$Noise = $regressionJson.$Testid.$envStr.Noise
$BestResultCommit = $regressionJson.$Testid.$envStr.BestResultCommit
if ($avg -lt $baseline) {
Write-GHError "🤮 Regression detected in $Testid for $envStr. Baseline: $baseline, New: $avg, BestResult: $BestResult, Noise: $Noise, BestResultCommit: $BestResultCommit"
return "🤮 Baseline: $baseline, New: $avg, BestResult: $BestResult, Noise: $Noise, BestResultCommit: $BestResultCommit"
}
} catch {
Write-Host "Not using a watermark-based regression method."
}
if ($avg -lt $baseline) {
Write-GHError "Regression detected in $Testid for $envStr. Baseline: $baseline, New: $avg"
return "🤮 Baseline: $baseline, New: $avg"
}
return "NULL"
}
function CheckRegressionLat($values, $regressionJson, $testid, $transport, $envStr) {
# $values is a flattened 1D array of the form:
# [ first run + RPS, second run + RPS, third run + RPS..... ],
# ie. if each run has 8 values + RPS, then the array has 27 elements (8*3 + 3)
# We store each subarray as [P0, P50, P90, P99 ... RPS, P0, ...]
# So just compute the average of P0, P50, P99 across the N runs, and compare that against the baseline.
$P0Avg = 0
$P50Avg = 0
$P99Avg = 0
$NumRuns = $values.Length / 9
for ($offset = 0; $offset -lt $values.Length; $offset += 9) {
$P0Avg += $values[$offset]
$P50Avg += $values[$offset + 1]
$P99Avg += $values[$offset + 3]
}
$P0Avg /= $NumRuns
$P50Avg /= $NumRuns
$P99Avg /= $NumRuns
$Testid = "$testid-$transport"
try {
$P0UpperBound= $regressionJson.$Testid.$envStr.latencyUpperBound.P0
$P50UpperBound= $regressionJson.$Testid.$envStr.latencyUpperBound.P50
$P99UpperBound= $regressionJson.$Testid.$envStr.latencyUpperBound.P99
} catch {
Write-Host "No regression upper bounds found"
return "NULL"
}
# There is a regression if ALL 3 values are more than their upper bound.
if ($P0Avg -gt $P0UpperBound -and $P50Avg -gt $P50UpperBound -and $P99Avg -gt $P99UpperBound) {
Write-GHError "Latency Regression detected in $Testid for $envStr."
Write-GHError "P0: $P0Avg, P50: $P50Avg, P99: $P99Avg"
Write-GHError "P0 upper bound: $P0UpperBound, P50 upper bound: $P50UpperBound, P99 upper bound: $P99UpperBound"
return "🤮 (Percentile avg, Upperbound) P0: ($P0Avg, $P0UpperBound) P50: ($P50Avg, $P50UpperBound) P99: ($P99Avg, $P99UpperBound)"
}
return "NULL"
}

Просмотреть файл

@ -204,34 +204,6 @@ $allTests["rps-up-512-down-4000"] = "-exec:lowlat -rstream:1 -up:512 -down:4000
$hasFailures = $false
$json["run_args"] = $allTests
function CheckRegressionTput($values, $testid, $transport, $regressionJson) {
# Returns true if there is a regression in this new run.
$sum = 0
foreach ($item in $values) {
$sum += $item
}
$avg = $sum / $values.Length
$envStr = "$os-$arch-$environment-$io-$tls"
$Testid = "$testid-$transport"
try {
$baseline = $regressionJson.$Testid.$envStr.baseline
} catch {
Write-Host "No regression baseline found"
return "NULL"
}
if ($avg -lt $baseline) {
Write-GHError "Regression detected in $Testid for $envStr. Baseline: $baseline, New: $avg"
return ":( Baseline: $baseline, New (avg of runs): $avg"
}
return "NULL"
}
function CheckRegressionLat($values, $regressionJson) {
# TODO: Generate and collect latency thresholds.
return "NULL"
}
try {
# Prepare the machines for the testing.
@ -281,8 +253,8 @@ if (!$isWindows) {
sudo sh -c "echo -n "%e.client.%p.%t.core" > /proc/sys/kernel/core_pattern"
}
Write-Host "Fetching regression.json"
$regressionJson = Get-Content -Raw -Path "regression.json" | ConvertFrom-Json
Write-Host "Fetching watermark_regression.json"
$regressionJson = Get-Content -Raw -Path "watermark_regression.json" | ConvertFrom-Json
# Run all the test cases.
Write-Host "Setup complete! Running all tests"
@ -296,16 +268,15 @@ foreach ($testId in $allTests.Keys) {
if ($Test.Values[$tcp].Length -eq 0) { continue }
$transport = $tcp -eq 1 ? "tcp" : "quic"
$json["$testId-$transport"] = $Test.Values[$tcp]
if ($Test.Metric -eq "throughput" -or $Test.Metric -eq "hps") {
$Regression = CheckRegressionTput $Test.Values[$tcp] $testId $transport $regressionJson
if ($Regression -ne "NULL") {
$json["$testId-$transport-regression"] = $Regression
}
} elseif ($Test.Metric -eq "latency") {
$ResultRegression = CheckRegressionResult $Test.Values[$tcp] $testId $transport $regressionJson "$os-$arch-$environment-$io-$tls"
if ($ResultRegression -ne "NULL") {
$json["$testId-$transport-regression"] = $ResultRegression
}
if ($Test.Metric -eq "latency") {
$json["$testId-$transport-lat"] = $Test.Latency[$tcp]
$Regression = CheckRegressionLat $Test.Latency[$tcp] $regressionJson
if ($Regression -ne "NULL") {
$json["$testId-$transport-lat-regression"] = $Regression
$LatencyRegression = CheckRegressionLat $Test.Values[$tcp] $regressionJson $testId $transport "$os-$arch-$environment-$io-$tls"
if ($LatencyRegression -ne "NULL") {
$json["$testId-$transport-lat-regression"] = $LatencyRegression
}
}
}