Vulkan: Remaining disjoint timer query functions

The following features where missing in c2b576d9e: - glGetIntegerv with GL_GPU_DISJOINT_EXT: this is currently impossible to query in Vulkan, so 0 is always returned. - glGetIntegerv with GL_TIMESTAMP_EXT: this is a way to query GPU timestamp without performing flushes or waiting for the GPU to finish. There is no direct correspondance in Vulkan; it's implemented by making a small submission, with no dependency to other submissions, in which there is only a timestamp query. Bug: angleproject:2885 Change-Id: I2341bd610db9084c26b6421c6f8949950ffa4de8 Reviewed-on: https://chromium-review.googlesource.com/c/1299873 Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org> Reviewed-by: Geoff Lang <geofflang@chromium.org>
2018-10-25 12:48:49 -04:00 · 2018-10-25 12:48:49 -04:00 · 749589f8b3
--- a/BUILD.gn
+++ b/BUILD.gn
@ -570,7 +570,7 @@ static_library("libANGLE") {
    }
    if (is_android) {
      sources += libangle_vulkan_android_sources
-      libs += ["vulkan"]
+      libs += [ "vulkan" ]
    }
    deps += [ ":angle_vulkan" ]
    public_deps += [ "$angle_root/third_party/vulkan-headers:vulkan_headers" ]
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@ -204,7 +204,6 @@ angle::Result ContextVk::initialize()
                                                         vk::kDefaultTimestampQueryPoolSize));
    ANGLE_TRY(mQueryPools[gl::QueryType::TimeElapsed].init(this, VK_QUERY_TYPE_TIMESTAMP,
                                                           vk::kDefaultTimestampQueryPoolSize));
-    // TODO(syoussefi): Initialize other query pools as they get implemented.

    size_t minAlignment = static_cast<size_t>(
        mRenderer->getPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment);
@ -898,14 +897,17 @@ angle::Result ContextVk::syncState(const gl::Context *context,

 GLint ContextVk::getGPUDisjoint()
 {
-    UNIMPLEMENTED();
-    return GLint();
+    // No extension seems to be available to query this information.
+    return 0;
 }

 GLint64 ContextVk::getTimestamp()
 {
-    UNIMPLEMENTED();
-    return GLint64();
+    uint64_t timestamp = 0;
+
+    (void)mRenderer->getTimestamp(this, &timestamp);
+
+    return static_cast<GLint64>(timestamp);
 }

 angle::Result ContextVk::onMakeCurrent(const gl::Context *context)
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@ -895,15 +895,18 @@ angle::Result RendererVk::finish(vk::Context *context)

    if (mGpuEventsEnabled)
    {
-        // Recalculate the CPU/GPU time difference to account for clock drifting.  Note that
-        // currently, the perftest event handler does not correctly handle out of order gpu and sync
-        // events, so make sure all gpu events are completed.  This loop should in practice execute
-        // once since the queue is already idle.
+        // This loop should in practice execute once since the queue is already idle.
        while (mInFlightGpuEventQueries.size() > 0)
        {
            ANGLE_TRY(checkCompletedGpuEvents(context));
        }
-        ANGLE_TRY(synchronizeCpuGpuTime(context));
+        // Recalculate the CPU/GPU time difference to account for clock drifting.  Avoid unnecessary
+        // synchronization if there is no event to be adjusted (happens when finish() gets called
+        // multiple times towards the end of the application).
+        if (mGpuEvents.size() > 0)
+        {
+            ANGLE_TRY(synchronizeCpuGpuTime(context));
+        }
    }

    return angle::Result::Continue();
@ -1007,9 +1010,9 @@ angle::Result RendererVk::submitFrame(vk::Context *context,
    // Reallocate the command pool for next frame.
    // TODO(jmadill): Consider reusing command pools.
    VkCommandPoolCreateInfo poolInfo = {};
-    poolInfo.sType            = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+    poolInfo.sType                   = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    poolInfo.flags                   = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT;
-    poolInfo.queueFamilyIndex = mCurrentQueueFamilyIndex;
+    poolInfo.queueFamilyIndex        = mCurrentQueueFamilyIndex;

    return mCommandPool.init(context, poolInfo);
 }
@ -1236,6 +1239,106 @@ vk::ShaderLibrary *RendererVk::getShaderLibrary()
    return &mShaderLibrary;
 }

+angle::Result RendererVk::getTimestamp(vk::Context *context, uint64_t *timestampOut)
+{
+    // The intent of this function is to query the timestamp without stalling the GPU.  Currently,
+    // that seems impossible, so instead, we are going to make a small submission with just a
+    // timestamp query.  First, the disjoint timer query extension says:
+    //
+    // > This will return the GL time after all previous commands have reached the GL server but
+    // have not yet necessarily executed.
+    //
+    // The previous commands are stored in the command graph at the moment and are not yet flushed.
+    // The wording allows us to make a submission to get the timestamp without performing a flush.
+    //
+    // Second:
+    //
+    // > By using a combination of this synchronous get command and the asynchronous timestamp query
+    // object target, applications can measure the latency between when commands reach the GL server
+    // and when they are realized in the framebuffer.
+    //
+    // This fits with the above strategy as well, although inevitably we are possibly introducing a
+    // GPU bubble.  This function directly generates a command buffer and submits it instead of
+    // using the other member functions.  This is to avoid changing any state, such as the queue
+    // serial.
+
+    // Create a query used to receive the GPU timestamp
+    vk::Scoped<vk::DynamicQueryPool> timestampQueryPool(mDevice);
+    vk::QueryHelper timestampQuery;
+    ANGLE_TRY(timestampQueryPool.get().init(context, VK_QUERY_TYPE_TIMESTAMP, 1));
+    ANGLE_TRY(timestampQueryPool.get().allocateQuery(context, &timestampQuery));
+
+    // Record the command buffer
+    vk::Scoped<vk::CommandBuffer> commandBatch(mDevice);
+    vk::CommandBuffer &commandBuffer = commandBatch.get();
+
+    VkCommandBufferAllocateInfo commandBufferInfo = {};
+    commandBufferInfo.sType                       = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+    commandBufferInfo.commandPool                 = mCommandPool.getHandle();
+    commandBufferInfo.level                       = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    commandBufferInfo.commandBufferCount          = 1;
+
+    ANGLE_TRY(commandBuffer.init(context, commandBufferInfo));
+
+    VkCommandBufferBeginInfo beginInfo = {};
+    beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    beginInfo.flags                    = 0;
+    beginInfo.pInheritanceInfo         = nullptr;
+
+    ANGLE_TRY(commandBuffer.begin(context, beginInfo));
+
+    commandBuffer.resetQueryPool(timestampQuery.getQueryPool()->getHandle(),
+                                 timestampQuery.getQuery(), 1);
+    commandBuffer.writeTimestamp(VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                 timestampQuery.getQueryPool()->getHandle(),
+                                 timestampQuery.getQuery());
+
+    ANGLE_TRY(commandBuffer.end(context));
+
+    // Create fence for the submission
+    VkFenceCreateInfo fenceInfo = {};
+    fenceInfo.sType             = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+    fenceInfo.flags             = 0;
+
+    vk::Scoped<vk::Fence> fence(mDevice);
+    ANGLE_TRY(fence.get().init(context, fenceInfo));
+
+    // Submit the command buffer
+    VkSubmitInfo submitInfo         = {};
+    submitInfo.sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    submitInfo.waitSemaphoreCount   = 0;
+    submitInfo.pWaitSemaphores      = nullptr;
+    submitInfo.pWaitDstStageMask    = nullptr;
+    submitInfo.commandBufferCount   = 1;
+    submitInfo.pCommandBuffers      = commandBuffer.ptr();
+    submitInfo.signalSemaphoreCount = 0;
+    submitInfo.pSignalSemaphores    = nullptr;
+
+    ANGLE_VK_TRY(context, vkQueueSubmit(mQueue, 1, &submitInfo, fence.get().getHandle()));
+
+    // Wait for the submission to finish.  Given no semaphores, there is hope that it would execute
+    // in parallel with what's already running on the GPU.
+    constexpr uint64_t kMaxFenceWaitTimeNs = 10'000'000'000llu;
+    angle::Result result                   = fence.get().wait(context, kMaxFenceWaitTimeNs);
+    if (result == angle::Result::Incomplete())
+    {
+        // Declare it a failure if it times out.
+        result = angle::Result::Stop();
+    }
+    ANGLE_TRY(result);
+
+    // Get the query results
+    constexpr VkQueryResultFlags queryFlags = VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT;
+
+    ANGLE_TRY(timestampQuery.getQueryPool()->getResults(context, timestampQuery.getQuery(), 1,
+                                                        sizeof(*timestampOut), timestampOut,
+                                                        sizeof(*timestampOut), queryFlags));
+
+    timestampQueryPool.get().freeQuery(context, &timestampQuery);
+
+    return angle::Result::Continue();
+}
+
 angle::Result RendererVk::synchronizeCpuGpuTime(vk::Context *context)
 {
    ASSERT(mGpuEventsEnabled);
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@ -173,6 +173,8 @@ class RendererVk : angle::NonCopyable
    vk::ShaderLibrary *getShaderLibrary();
    const FeaturesVk &getFeatures() const { return mFeatures; }

+    angle::Result getTimestamp(vk::Context *context, uint64_t *timestampOut);
+
    // Create Begin/End/Instant GPU trace events, which take their timestamps from GPU queries.
    // The events are queued until the query results are available.  Possible values for `phase`
    // are TRACE_EVENT_PHASE_*