Retool perf test running.

This change does a few things involving the way we run the perf tests: - Perf test runner can capture different metrics Useful for capturing the new "nanoSecPerIteration" metric. - Removes the "score" metric We'll move to the new time-based metrics. These new metrics are scaled correctly with iteration counts. - Runs three trials per perf test This gives more measurements per test. Each trial is approximately one second. First the perf tests set a fixed number of iterations after calibrating the number of steps that we can run in one second. After that the three trials are run. This should give more stable results. - Apply more CPU stabilization on Windows Use SetPriorityClass to apply more CPU priority. Also upgrade SetThreadPriority to the highest level. - Always build the Vulkan command buffer test This catches build regressions more easily. We still skip the test on non-Android platforms. Bug: angleproject:2923 Change-Id: I7da234c5af07775ba4a232bb8d65e0138ee7073f Reviewed-on: https://chromium-review.googlesource.com/c/1330262 Commit-Queue: Jamie Madill <jmadill@chromium.org> Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
2018-11-14 16:24:25 -05:00 · 2018-11-14 16:24:25 -05:00 · f3acb8c133
--- a/scripts/perf_test_runner.py
+++ b/scripts/perf_test_runner.py
@ -20,7 +20,8 @@ base_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file_

 # Might have to add lower case "release" in some configurations.
 perftests_paths = glob.glob('out/*Release*')
-metric = 'score'
+metric = 'wall_time'
+max_experiments = 10

 binary_name = 'angle_perftests'
 if sys.platform == 'win32':
@ -95,44 +96,43 @@ if len(sys.argv) >= 2:
 print('Using test executable: ' + perftests_path)
 print('Test name: ' + test_name)

-# Infinite loop of running the tests.
-while True:
-    process = subprocess.Popen([perftests_path, '--gtest_filter=' + test_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+def get_results(metric, extra_args=[]):
+    process = subprocess.Popen([perftests_path, '--gtest_filter=' + test_name] + extra_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, err = process.communicate()

-    start_index = output.find(metric + "=")
-    if start_index == -1:
-        print("Did not find the score of the specified test in output:")
-        print(output)
-        sys.exit(1)
-
-    start_index += len(metric) + 2
-
-    end_index = output[start_index:].find(" ")
-    if end_index == -1:
-        print("Error parsing output:")
-        print(output)
-        sys.exit(2)
-
-    m = re.search('Running (\d+) tests', output)
+    m = re.search(r'Running (\d+) tests', output)
    if m and int(m.group(1)) > 1:
        print("Found more than one test result in output:")
        print(output)
        sys.exit(3)

-    end_index += start_index
+    pattern = metric + r'= ([0-9.]+)'
+    m = re.findall(pattern, output)
+    if m is None:
+        print("Did not find the metric '%s' in the test output:" % metric)
+        print(output)
+        sys.exit(1)

-    score = int(output[start_index:end_index])
-    sys.stdout.write("score: " + str(score))
+    return [float(value) for value in m]

+# Calibrate the number of steps
+steps = get_results("steps", ["--calibration"])[0]
+print("running with %d steps." % steps)
+
+# Loop 'max_experiments' times, running the tests.
+for experiment in range(max_experiments):
+    experiment_scores = get_results(metric, ["--steps", str(steps)])
+
+    for score in experiment_scores:
+        sys.stdout.write("%s: %.2f" % (metric, score))
        scores.append(score)
-    sys.stdout.write(", mean: %.2f" % mean(scores))

        if (len(scores) > 1):
+            sys.stdout.write(", mean: %.2f" % mean(scores))
            sys.stdout.write(", variation: %.2f%%" % (coefficient_of_variation(scores) * 100.0))

        if (len(scores) > 7):
-        trucation_n = len(scores) >> 3
+            truncation_n = len(scores) >> 3
            sys.stdout.write(", truncated mean: %.2f" % truncated_mean(scores, trucation_n))
            sys.stdout.write(", variation: %.2f%%" % (truncated_cov(scores, trucation_n) * 100.0))

--- a/src/tests/BUILD.gn
+++ b/src/tests/BUILD.gn
@ -255,9 +255,6 @@ if (is_win || is_linux || is_android || is_mac) {

    if (angle_enable_vulkan) {
      sources += angle_perf_tests_vulkan_sources
-      if (is_android) {
-        sources += angle_perf_tests_vulkan_command_buffer_sources
-      }
      deps += [ "$angle_root/third_party/glslang:glslang" ]
      public_configs = [ "$angle_root/third_party/glslang:glslang_config" ]
    }
--- a/src/tests/angle_perftests.gni
+++ b/src/tests/angle_perftests.gni
@ -38,12 +38,12 @@ angle_perf_tests_sources = [
  "test_utils/draw_call_perf_utils.cpp",
  "test_utils/draw_call_perf_utils.h",
 ]
-angle_perf_tests_win_sources = [ "perf_tests/IndexDataManagerTest.cpp" ]
-angle_perf_tests_vulkan_sources = [ "perf_tests/VulkanPipelineCachePerf.cpp" ]

-# Currently Vulkan Command Buffer Perf Tests compile on Android/Linux
-angle_perf_tests_vulkan_command_buffer_sources = [
+angle_perf_tests_win_sources = [ "perf_tests/IndexDataManagerTest.cpp" ]
+
+angle_perf_tests_vulkan_sources = [
  "perf_tests/VulkanCommandBufferPerf.cpp",
+  "perf_tests/VulkanPipelineCachePerf.cpp",
  "test_utils/third_party/vulkan_command_buffer_utils.cpp",
  "test_utils/third_party/vulkan_command_buffer_utils.h",
 ]
--- a/src/tests/angle_perftests_main.cpp
+++ b/src/tests/angle_perftests_main.cpp
@ -9,28 +9,11 @@

 #include <gtest/gtest.h>

-extern bool g_OnlyOneRunFrame;
-extern bool gEnableTrace;
-extern const char *gTraceFile;
+void ANGLEProcessPerfTestArgs(int *argc, char **argv);

 int main(int argc, char **argv)
 {
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp("--one-frame-only", argv[i]) == 0)
-        {
-            g_OnlyOneRunFrame = true;
-        }
-        if (strcmp("--enable-trace", argv[i]) == 0)
-        {
-            gEnableTrace = true;
-        }
-        if (strcmp("--trace-file", argv[i]) == 0 && i < argc - 1)
-        {
-            gTraceFile = argv[++i];
-        }
-    }
-
+    ANGLEProcessPerfTestArgs(&argc, argv);
    testing::InitGoogleTest(&argc, argv);
    testing::AddGlobalTestEnvironment(new testing::Environment());
    int rt = RUN_ALL_TESTS();
--- a/src/tests/perf_tests/ANGLEPerfTest.cpp
+++ b/src/tests/perf_tests/ANGLEPerfTest.cpp
@ -17,15 +17,21 @@
 #include <cmath>
 #include <fstream>
 #include <iostream>
+#include <sstream>

 #include <json/json.h>

 namespace
 {
 constexpr size_t kInitialTraceEventBufferSize = 50000;
-constexpr size_t kWarmupIterations            = 3;
 constexpr double kMicroSecondsPerSecond       = 1e6;
 constexpr double kNanoSecondsPerSecond        = 1e9;
+constexpr double kCalibrationRunTimeSeconds   = 1.0;
+constexpr double kMaximumRunTimeSeconds       = 10.0;
+constexpr unsigned int kNumTrials             = 3;
+
+bool gCalibration = false;
+Optional<unsigned int> gStepsToRunOverride;

 struct TraceCategory
 {
@ -154,7 +160,6 @@ ANGLEPerfTest::ANGLEPerfTest(const std::string &name,
    : mName(name),
      mSuffix(suffix),
      mTimer(CreateTimer()),
-      mRunTimeSeconds(2.0),
      mSkipTest(false),
      mNumStepsPerformed(0),
      mIterationsPerStep(iterationsPerStep),
@ -174,18 +179,53 @@ void ANGLEPerfTest::run()
        return;
    }

+    // Calibrate to a fixed number of steps during an initial set time.
+    if (!gStepsToRunOverride.valid())
+    {
+        doRunLoop(kCalibrationRunTimeSeconds);
+
+        // Calibration allows the perf test runner script to save some time.
+        if (gCalibration)
+        {
+            printResult("steps", static_cast<size_t>(mNumStepsPerformed), "count", false);
+            return;
+        }
+
+        gStepsToRunOverride = mNumStepsPerformed;
+    }
+
+    // Do another warmup run. Seems to consistently improve results.
+    doRunLoop(kMaximumRunTimeSeconds);
+
+    for (unsigned int trial = 0; trial < kNumTrials; ++trial)
+    {
+        doRunLoop(kMaximumRunTimeSeconds);
+        printResults();
+    }
+}
+
+void ANGLEPerfTest::doRunLoop(double maxRunTime)
+{
+    mNumStepsPerformed = 0;
+    mRunning           = true;
    mTimer->start();
+
    while (mRunning)
    {
        step();
        if (mRunning)
        {
            ++mNumStepsPerformed;
-        }
-        if (mTimer->getElapsedTime() > mRunTimeSeconds || g_OnlyOneRunFrame)
+            if (mTimer->getElapsedTime() > maxRunTime)
            {
                mRunning = false;
            }
+            else if (gStepsToRunOverride.valid() &&
+                     mNumStepsPerformed >= gStepsToRunOverride.value())
+            {
+                mRunning = false;
+            }
+        }
    }
    finishTest();
    mTimer->stop();
@ -207,11 +247,10 @@ void ANGLEPerfTest::SetUp()

 void ANGLEPerfTest::TearDown()
 {
-    if (mSkipTest)
-    {
-        return;
 }

+void ANGLEPerfTest::printResults()
+{
    double elapsedTimeSeconds = mTimer->getElapsedTime();

    double secondsPerStep      = elapsedTimeSeconds / static_cast<double>(mNumStepsPerformed);
@ -221,16 +260,13 @@ void ANGLEPerfTest::TearDown()
    if (secondsPerIteration > 1e-3)
    {
        double microSecondsPerIteration = secondsPerIteration * kMicroSecondsPerSecond;
-        printResult("microSecPerIteration", microSecondsPerIteration, "us", true);
+        printResult("wall_time", microSecondsPerIteration, "us", true);
    }
    else
    {
        double nanoSecPerIteration = secondsPerIteration * kNanoSecondsPerSecond;
-        printResult("nanoSecPerIteration", nanoSecPerIteration, "ns", true);
+        printResult("wall_time", nanoSecPerIteration, "ns", true);
    }
-
-    double relativeScore = static_cast<double>(mNumStepsPerformed) / elapsedTimeSeconds;
-    printResult("score", static_cast<size_t>(std::round(relativeScore)), "score", true);
 }

 double ANGLEPerfTest::normalizedTime(size_t value) const
@ -269,9 +305,11 @@ ANGLERenderTest::ANGLERenderTest(const std::string &name, const RenderTestParams
      mOSWindow(nullptr)
 {
    // Force fast tests to make sure our slowest bots don't time out.
+    // TODO(jmadill): Remove this flag once rolled into Chromium. http://anglebug.com/2923
    if (g_OnlyOneRunFrame)
    {
        const_cast<RenderTestParams &>(testParams).iterationsPerStep = 1;
+        gStepsToRunOverride                                          = 1;
    }

    // Try to ensure we don't trigger allocation during execution.
@ -341,15 +379,6 @@ void ANGLERenderTest::SetUp()
        abortTest();
        return;
    }
-
-    // Warm up the benchmark to reduce variance.
-    if (!g_OnlyOneRunFrame)
-    {
-        for (size_t iteration = 0; iteration < kWarmupIterations; ++iteration)
-        {
-            drawBenchmark();
-        }
-    }
 }

 void ANGLERenderTest::TearDown()
@ -453,3 +482,47 @@ EGLWindow *ANGLERenderTest::createEGLWindow(const RenderTestParams &testParams)
    return new EGLWindow(testParams.majorVersion, testParams.minorVersion,
                         testParams.eglParameters);
 }
+
+void ANGLEProcessPerfTestArgs(int *argc, char **argv)
+{
+    int argcOutCount = 0;
+
+    for (int argIndex = 0; argIndex < *argc; argIndex++)
+    {
+        if (strcmp("--one-frame-only", argv[argIndex]) == 0)
+        {
+            g_OnlyOneRunFrame   = true;
+            gStepsToRunOverride = 1;
+        }
+        else if (strcmp("--enable-trace", argv[argIndex]) == 0)
+        {
+            gEnableTrace = true;
+        }
+        else if (strcmp("--trace-file", argv[argIndex]) == 0 && argIndex < *argc - 1)
+        {
+            gTraceFile = argv[argIndex];
+            // Skip an additional argument.
+            argIndex++;
+        }
+        else if (strcmp("--calibration", argv[argIndex]) == 0)
+        {
+            gCalibration = true;
+        }
+        else if (strcmp("--steps", argv[argIndex]) == 0 && argIndex < *argc - 1)
+        {
+            unsigned int stepsToRun = 0;
+            std::stringstream strstr;
+            strstr << argv[argIndex + 1];
+            strstr >> stepsToRun;
+            gStepsToRunOverride = stepsToRun;
+            // Skip an additional argument.
+            argIndex++;
+        }
+        else
+        {
+            argv[argcOutCount++] = argv[argIndex];
+        }
+    }
+
+    *argc = argcOutCount;
+}
--- a/src/tests/perf_tests/ANGLEPerfTest.h
+++ b/src/tests/perf_tests/ANGLEPerfTest.h
@ -84,14 +84,16 @@ class ANGLEPerfTest : public testing::Test, angle::NonCopyable
    void abortTest() { mRunning = false; }

    unsigned int getNumStepsPerformed() const { return mNumStepsPerformed; }
+    void doRunLoop(double maxRunTime);

    std::string mName;
    std::string mSuffix;
    Timer *mTimer;
-    double mRunTimeSeconds;
    bool mSkipTest;

  private:
+    void printResults();
+
    unsigned int mNumStepsPerformed;
    unsigned int mIterationsPerStep;
    bool mRunning;
--- a/src/tests/perf_tests/CompilerPerf.cpp
+++ b/src/tests/perf_tests/CompilerPerf.cpp
@ -162,7 +162,7 @@ void main()

 const char *kTrickyESSL300Id = "TrickyESSL300";

-constexpr int kNumIterationsPerStep = 10;
+constexpr int kNumIterationsPerStep = 4;

 struct CompilerPerfParameters final : public angle::CompilerParameters
 {
--- a/src/tests/perf_tests/DrawCallPerf.cpp
+++ b/src/tests/perf_tests/DrawCallPerf.cpp
@ -110,7 +110,6 @@ class DrawCallPerfBenchmark : public ANGLERenderTest,

 DrawCallPerfBenchmark::DrawCallPerfBenchmark() : ANGLERenderTest("DrawCallPerf", GetParam())
 {
-    mRunTimeSeconds = GetParam().runTimeSeconds;
 }

 void DrawCallPerfBenchmark::initializeBenchmark()
--- a/src/tests/perf_tests/DrawElementsPerf.cpp
+++ b/src/tests/perf_tests/DrawElementsPerf.cpp
@ -90,8 +90,6 @@ class DrawElementsPerfBenchmark : public ANGLERenderTest,
 DrawElementsPerfBenchmark::DrawElementsPerfBenchmark()
    : ANGLERenderTest("DrawElementsPerf", GetParam())
 {
-    mRunTimeSeconds = GetParam().runTimeSeconds;
-
    if (GetParam().type == GL_UNSIGNED_INT)
    {
        addExtensionPrerequisite("GL_OES_element_index_uint");
--- a/src/tests/perf_tests/IndexConversionPerf.cpp
+++ b/src/tests/perf_tests/IndexConversionPerf.cpp
@ -72,7 +72,6 @@ IndexConversionPerfTest::IndexConversionPerfTest()
      mVertexBuffer(0),
      mIndexBuffer(0)
 {
-    mRunTimeSeconds = 3.0;
 }

 void IndexConversionPerfTest::initializeBenchmark()
--- a/src/tests/perf_tests/InstancingPerf.cpp
+++ b/src/tests/perf_tests/InstancingPerf.cpp
@ -103,7 +103,6 @@ class InstancingPerfBenchmark : public ANGLERenderTest,
 InstancingPerfBenchmark::InstancingPerfBenchmark()
    : ANGLERenderTest("InstancingPerf", GetParam()), mProgram(0), mNumPoints(75000)
 {
-    mRunTimeSeconds = GetParam().runTimeSeconds;
 }

 void InstancingPerfBenchmark::initializeBenchmark()
--- a/src/tests/perf_tests/VulkanCommandBufferPerf.cpp
+++ b/src/tests/perf_tests/VulkanCommandBufferPerf.cpp
@ -79,6 +79,11 @@ VulkanCommandBufferPerfTest::VulkanCommandBufferPerfTest()
    mCBImplementation = GetParam().CBImplementation;
    mFrames           = GetParam().frames;
    mBuffers          = GetParam().buffers;
+
+// This test appears to be flaky on multiple platforms.
+#if !defined(ANGLE_PLATFORM_ANDROID)
+    mSkipTest = true;
+#endif  // !defined(ANGLE_PLATFORM_ANDROID)
 }

 void VulkanCommandBufferPerfTest::SetUp()
--- a/src/tests/test_utils/third_party/vulkan_command_buffer_utils.cpp
+++ b/src/tests/test_utils/third_party/vulkan_command_buffer_utils.cpp
@ -397,7 +397,7 @@ LRESULT CALLBACK WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam)

 void init_window(struct sample_info &info)
 {
-    WNDCLASSEX win_class;
+    WNDCLASSEXA win_class;
    assert(info.width > 0);
    assert(info.height > 0);

@ -418,7 +418,7 @@ void init_window(struct sample_info &info)
    win_class.lpszClassName = info.name;
    win_class.hIconSm       = LoadIcon(NULL, IDI_WINLOGO);
    // Register window class:
-    if (!RegisterClassEx(&win_class))
+    if (!RegisterClassExA(&win_class))
    {
        // It didn't work, so try to give a useful error:
        printf("Unexpected error trying to start the application!\n");
@ -428,7 +428,7 @@ void init_window(struct sample_info &info)
    // Create window with the registered class:
    RECT wr = {0, 0, info.width, info.height};
    AdjustWindowRect(&wr, WS_OVERLAPPEDWINDOW, FALSE);
-    info.window = CreateWindowEx(0,
+    info.window = CreateWindowExA(0,
                                  info.name,             // class name
                                  info.name,             // app name
                                  WS_OVERLAPPEDWINDOW |  // window style
@ -454,6 +454,7 @@ void destroy_window(struct sample_info &info)
 {
    vkDestroySurfaceKHR(info.inst, info.surface, NULL);
    DestroyWindow(info.window);
+    UnregisterClassA(info.name, GetModuleHandle(NULL));
 }

 #elif defined(__ANDROID__)
@ -846,7 +847,16 @@ void init_swap_chain(struct sample_info &info, VkImageUsageFlags usageFlags)

    // The FIFO present mode is guaranteed by the spec to be supported
    // Also note that current Android driver only supports FIFO
-    VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR;
+    VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR;
+
+    for (uint32_t presentModeIndex = 0; presentModeIndex < presentModeCount; ++presentModeIndex)
+    {
+        if (presentModes[presentModeIndex] == VK_PRESENT_MODE_IMMEDIATE_KHR)
+        {
+            swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR;
+            break;
+        }
+    }

    // Determine the number of VkImage's to use in the swap chain.
    // We need to acquire only 1 presentable image at at time.
--- a/util/windows/win32/Win32_system_utils.cpp
+++ b/util/windows/win32/Win32_system_utils.cpp
@ -25,7 +25,11 @@ bool StabilizeCPUForBenchmarking()
    {
        return false;
    }
-    if (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST) == FALSE)
+    if (SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) == FALSE)
+    {
+        return false;
+    }
+    if (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL) == FALSE)
    {
        return false;
    }