Vulkan: Skip empty submissions

A number of places in ANGLE perform an implicit flush; eglSwapBuffers(),
glFenceSync() etc.  Sometimes these flushes are unnecessary because
there is nothing to submit.  Additionally, an application may
unnecessarily issue glFlush() with nothing recorded.

In this change, empty command buffers are automatically not submitted,
optimizing these unnecessary flushes away.

Bug: angleproject:7084
Change-Id: Iecb865b6b9ef8045dfecda7b5221874f7031b42e
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3600837
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Amirali Abdolrashidi <abdolrashidi@google.com>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
This commit is contained in:
Shahbaz Youssefi 2022-04-21 23:13:02 -04:00 коммит произвёл Angle LUCI CQ
Родитель b07a115690
Коммит 2db718edf2
8 изменённых файлов: 211 добавлений и 82 удалений

Просмотреть файл

@ -131,7 +131,6 @@ struct PerfMonitorTriplet
};
#define ANGLE_VK_PERF_COUNTERS_X(FN) \
FN(primaryBuffers) \
FN(renderPasses) \
FN(submittedCommands) \
FN(writeDescriptorSets) \

Просмотреть файл

@ -81,6 +81,27 @@ ANGLE_MAYBE_UNUSED void ResetSecondaryCommandBuffers<std::vector<VulkanSecondary
}
commandBuffers->clear();
}
// Count the number of batches with serial <= given serial. A reference to the fence of the last
// batch with a valid fence is returned for waiting purposes. Note that due to empty submissions
// being optimized out, there may not be a fence associated with every batch.
size_t GetBatchCountUpToSerial(std::vector<CommandBatch> &inFlightCommands,
Serial serial,
Shared<Fence> **fenceToWaitOnOut)
{
size_t batchCount = 0;
while (batchCount < inFlightCommands.size() && inFlightCommands[batchCount].serial <= serial)
{
if (inFlightCommands[batchCount].fence.isReferenced())
{
*fenceToWaitOnOut = &inFlightCommands[batchCount].fence;
}
batchCount++;
}
return batchCount;
}
} // namespace
angle::Result FenceRecycler::newSharedFence(vk::Context *context,
@ -828,7 +849,8 @@ angle::Result CommandProcessor::ensureNoPendingWork(Context *context)
}
// CommandQueue implementation.
CommandQueue::CommandQueue() : mCurrentQueueSerial(mQueueSerialFactory.generate()) {}
CommandQueue::CommandQueue() : mCurrentQueueSerial(mQueueSerialFactory.generate()), mPerfCounters{}
{}
CommandQueue::~CommandQueue() = default;
@ -886,12 +908,18 @@ angle::Result CommandQueue::checkCompletedCommands(Context *context)
for (CommandBatch &batch : mInFlightCommands)
{
VkResult result = batch.fence.get().getStatus(device);
if (result == VK_NOT_READY)
// For empty submissions, fence is not set but there may be garbage to be collected. In
// such a case, the empty submission is "completed" at the same time as the last submission
// that actually happened.
if (batch.fence.isReferenced())
{
break;
VkResult result = batch.fence.get().getStatus(device);
if (result == VK_NOT_READY)
{
break;
}
ANGLE_VK_TRY(context, result);
}
ANGLE_VK_TRY(context, result);
++finishedCount;
}
@ -915,11 +943,18 @@ angle::Result CommandQueue::retireFinishedCommands(Context *context, size_t fini
CommandBatch &batch = mInFlightCommands[commandIndex];
mLastCompletedQueueSerial = batch.serial;
mFenceRecycler.resetSharedFence(&batch.fence);
ANGLE_TRACE_EVENT0("gpu.angle", "command buffer recycling");
if (batch.fence.isReferenced())
{
mFenceRecycler.resetSharedFence(&batch.fence);
}
if (batch.primaryCommands.valid())
{
ANGLE_TRACE_EVENT0("gpu.angle", "Primary command buffer recycling");
PersistentCommandPool &commandPool = getCommandPool(batch.hasProtectedContent);
ANGLE_TRY(commandPool.collect(context, std::move(batch.primaryCommands)));
}
ANGLE_TRACE_EVENT0("gpu.angle", "Secondary command buffer recycling");
batch.resetSecondaryCommandBuffers(device);
PersistentCommandPool &commandPool = getCommandPool(batch.hasProtectedContent);
ANGLE_TRY(commandPool.collect(context, std::move(batch.primaryCommands)));
}
if (finishedCount > 0)
@ -982,16 +1017,23 @@ void CommandQueue::handleDeviceLost(RendererVk *renderer)
for (CommandBatch &batch : mInFlightCommands)
{
// On device loss we need to wait for fence to be signaled before destroying it
VkResult status = batch.fence.get().wait(device, renderer->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
if (batch.fence.isReferenced())
{
VkResult status = batch.fence.get().wait(device, renderer->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
batch.fence.reset(device);
}
// On device lost, here simply destroy the CommandBuffer, it will fully cleared later
// by CommandPool::destroy
batch.primaryCommands.destroy(device);
if (batch.primaryCommands.valid())
{
batch.primaryCommands.destroy(device);
}
batch.resetSecondaryCommandBuffers(device);
batch.fence.reset(device);
}
mInFlightCommands.clear();
}
@ -1013,28 +1055,26 @@ angle::Result CommandQueue::finishToSerial(Context *context, Serial finishSerial
// Find the serial in the the list. The serials should be in order.
ASSERT(CommandsHaveValidOrdering(mInFlightCommands));
size_t finishedCount = 0;
while (finishedCount < mInFlightCommands.size() &&
mInFlightCommands[finishedCount].serial <= finishSerial)
{
finishedCount++;
}
Shared<Fence> *fenceToWaitOn = nullptr;
size_t finishCount = GetBatchCountUpToSerial(mInFlightCommands, finishSerial, &fenceToWaitOn);
if (finishedCount == 0)
if (finishCount == 0)
{
return angle::Result::Continue;
}
const CommandBatch &batch = mInFlightCommands[finishedCount - 1];
// Wait for it finish. If no fence, the serial is already finished, it might just have garbage
// to clean up.
if (fenceToWaitOn != nullptr)
{
VkDevice device = context->getDevice();
VkResult status = fenceToWaitOn->get().wait(device, timeout);
// Wait for it finish
VkDevice device = context->getDevice();
VkResult status = batch.fence.get().wait(device, timeout);
ANGLE_VK_TRY(context, status);
ANGLE_VK_TRY(context, status);
}
// Clean up finished batches.
ANGLE_TRY(retireFinishedCommands(context, finishedCount));
ANGLE_TRY(retireFinishedCommands(context, finishCount));
ASSERT(allInFlightCommandsAreAfterSerial(finishSerial));
return angle::Result::Continue;
@ -1064,38 +1104,48 @@ angle::Result CommandQueue::submitFrame(
SecondaryCommandPools *commandPools,
Serial submitQueueSerial)
{
// Start an empty primary buffer if we have an empty submit.
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
ANGLE_TRY(ensurePrimaryCommandBufferValid(context, hasProtectedContent));
ANGLE_VK_TRY(context, commandBuffer.end());
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, commandBuffer, waitSemaphores, waitSemaphoreStageMasks,
signalSemaphore);
VkProtectedSubmitInfo protectedSubmitInfo = {};
if (hasProtectedContent)
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitFrame");
RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice();
DeviceScoped<CommandBatch> scopedBatch(device);
CommandBatch &batch = scopedBatch.get();
ANGLE_TRY(mFenceRecycler.newSharedFence(context, &batch.fence));
batch.serial = submitQueueSerial;
batch.hasProtectedContent = hasProtectedContent;
batch.commandBuffersToReset = std::move(commandBuffersToReset);
ANGLE_TRY(queueSubmit(context, priority, submitInfo, &batch.fence.get(), batch.serial));
// Don't make a submission if there is nothing to submit.
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
const bool hasAnyPendingCommands = commandBuffer.valid();
if (hasAnyPendingCommands || signalSemaphore != nullptr || !waitSemaphores.empty())
{
if (commandBuffer.valid())
{
ANGLE_VK_TRY(context, commandBuffer.end());
}
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, commandBuffer, waitSemaphores, waitSemaphoreStageMasks,
signalSemaphore);
VkProtectedSubmitInfo protectedSubmitInfo = {};
if (hasProtectedContent)
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitFrame");
ANGLE_TRY(mFenceRecycler.newSharedFence(context, &batch.fence));
ANGLE_TRY(queueSubmit(context, priority, submitInfo, &batch.fence.get(), batch.serial));
}
else
{
mLastSubmittedQueueSerial = batch.serial;
}
if (!currentGarbage.empty())
{
@ -1135,39 +1185,35 @@ angle::Result CommandQueue::waitForSerialWithUserTimeout(vk::Context *context,
uint64_t timeout,
VkResult *result)
{
// No in-flight work. This indicates the serial is already complete.
if (mInFlightCommands.empty())
Shared<Fence> *fenceToWaitOn = nullptr;
size_t finishCount = GetBatchCountUpToSerial(mInFlightCommands, serial, &fenceToWaitOn);
// The serial is already complete if:
//
// - There is no in-flight work (i.e. mInFlightCommands is empty), or
// - The given serial is smaller than the smallest serial, or
// - Every batch up to this serial is a garbage-clean-up-only batch (i.e. empty submission
// that's optimized out)
if (finishCount == 0 || fenceToWaitOn == nullptr)
{
*result = VK_SUCCESS;
return angle::Result::Continue;
}
// Serial is already complete.
if (serial < mInFlightCommands[0].serial)
{
*result = VK_SUCCESS;
return angle::Result::Continue;
}
size_t batchIndex = 0;
while (batchIndex != mInFlightCommands.size() && mInFlightCommands[batchIndex].serial < serial)
{
batchIndex++;
}
const CommandBatch &batch = mInFlightCommands[finishCount - 1];
// Serial is not yet submitted. This is undefined behaviour, so we can do anything.
if (batchIndex >= mInFlightCommands.size())
if (serial > batch.serial)
{
ASSERT(finishCount == mInFlightCommands.size());
WARN() << "Waiting on an unsubmitted serial.";
*result = VK_TIMEOUT;
return angle::Result::Continue;
}
ASSERT(serial == batch.serial);
ASSERT(serial == mInFlightCommands[batchIndex].serial);
vk::Fence &fence = mInFlightCommands[batchIndex].fence.get();
ASSERT(fence.valid());
*result = fence.wait(context->getDevice(), timeout);
*result = fenceToWaitOn->get().wait(context->getDevice(), timeout);
// Don't trigger an error on timeout.
if (*result != VK_TIMEOUT)
@ -1278,6 +1324,8 @@ angle::Result CommandQueue::queueSubmit(Context *context,
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, fenceHandle));
mLastSubmittedQueueSerial = submitQueueSerial;
++mPerfCounters.submittedCommands;
// Now that we've submitted work, clean up RendererVk garbage
return renderer->cleanupGarbage(mLastCompletedQueueSerial);
}

Просмотреть файл

@ -425,6 +425,8 @@ class CommandQueue final : public CommandQueueInterface
VkQueue getQueue(egl::ContextPriority priority) { return mQueueMap[priority]; }
const angle::VulkanPerfCounters &getPerfCounters() const { return mPerfCounters; }
private:
void releaseToCommandBatch(bool hasProtectedContent,
PrimaryCommandBuffer &&commandBuffer,
@ -479,6 +481,8 @@ class CommandQueue final : public CommandQueueInterface
DeviceQueueMap mQueueMap;
FenceRecycler mFenceRecycler;
angle::VulkanPerfCounters mPerfCounters;
};
// CommandProcessor is used to dispatch work to the GPU when the asyncCommandQueue feature is
@ -568,6 +572,13 @@ class CommandProcessor final : public Context, public CommandQueueInterface
uint32_t getDeviceQueueIndex() const { return mCommandQueue.getDeviceQueueIndex(); }
VkQueue getQueue(egl::ContextPriority priority) { return mCommandQueue.getQueue(priority); }
// Note that due to inheritance from vk::Context, this class has a set of perf counters as well,
// but currently only the counters in the member command queue are of interest.
const angle::VulkanPerfCounters &getPerfCounters() const
{
return mCommandQueue.getPerfCounters();
}
private:
bool hasPendingError() const
{

Просмотреть файл

@ -760,6 +760,7 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mRenderPassCommands(nullptr),
mQueryEventType(GraphicsEventCmdBuf::NotInQueryCmd),
mGpuEventsEnabled(false),
mPrimaryBufferEventCounter(0),
mHasDeferredFlush(false),
mGpuClockSync{std::numeric_limits<double>::max(), std::numeric_limits<double>::max()},
mGpuEventTimestampOrigin(0),
@ -1119,9 +1120,7 @@ angle::Result ContextVk::initialize()
vk::kDefaultTimestampQueryPoolSize));
ANGLE_TRY(synchronizeCpuGpuTime());
mPerfCounters.primaryBuffers++;
EventName eventName = GetTraceEventName("Primary", mPerfCounters.primaryBuffers);
EventName eventName = GetTraceEventName("Primary", mPrimaryBufferEventCounter);
ANGLE_TRY(traceGpuEvent(&mOutsideRenderPassCommands->getCommandBuffer(),
TRACE_EVENT_PHASE_BEGIN, eventName));
}
@ -1181,10 +1180,15 @@ angle::Result ContextVk::finish(const gl::Context *context)
if (mRenderer->getFeatures().swapbuffersOnFlushOrFinishWithSingleBuffer.enabled &&
(mCurrentWindowSurface != nullptr) && mCurrentWindowSurface->isSharedPresentMode())
{
return mCurrentWindowSurface->onSharedPresentContextFlush(context);
ANGLE_TRY(mCurrentWindowSurface->onSharedPresentContextFlush(context));
}
else
{
ANGLE_TRY(finishImpl(RenderPassClosureReason::GLFinish));
}
return finishImpl(RenderPassClosureReason::GLFinish);
syncObjectPerfCounters();
return angle::Result::Continue;
}
angle::Result ContextVk::setupDraw(const gl::Context *context,
@ -2524,6 +2528,9 @@ void ContextVk::syncObjectPerfCounters()
{
mPerfCounters.descriptorSetCacheTotalSize += driverSet.descriptorSetCache.getSize();
}
// Update perf counters from the renderer as well
mPerfCounters.submittedCommands = mRenderer->getCommandQueuePerfCounters().submittedCommands;
}
void ContextVk::updateOverlayOnPresent()
@ -2687,7 +2694,6 @@ angle::Result ContextVk::submitCommands(const vk::Semaphore *signalSemaphore,
ANGLE_TRY(checkCompletedGpuEvents());
}
mPerfCounters.submittedCommands++;
resetTotalBufferToImageCopySize();
return angle::Result::Continue;
@ -6127,7 +6133,7 @@ angle::Result ContextVk::flushAndGetSerial(const vk::Semaphore *signalSemaphore,
if (mGpuEventsEnabled)
{
EventName eventName = GetTraceEventName("Primary", mPerfCounters.primaryBuffers);
EventName eventName = GetTraceEventName("Primary", mPrimaryBufferEventCounter);
ANGLE_TRY(traceGpuEvent(&mOutsideRenderPassCommands->getCommandBuffer(),
TRACE_EVENT_PHASE_END, eventName));
}
@ -6157,11 +6163,9 @@ angle::Result ContextVk::flushAndGetSerial(const vk::Semaphore *signalSemaphore,
ASSERT(mWaitSemaphores.empty());
ASSERT(mWaitSemaphoreStageMasks.empty());
mPerfCounters.primaryBuffers++;
if (mGpuEventsEnabled)
{
EventName eventName = GetTraceEventName("Primary", mPerfCounters.primaryBuffers);
EventName eventName = GetTraceEventName("Primary", ++mPrimaryBufferEventCounter);
ANGLE_TRY(traceGpuEvent(&mOutsideRenderPassCommands->getCommandBuffer(),
TRACE_EVENT_PHASE_BEGIN, eventName));
}

Просмотреть файл

@ -1327,6 +1327,8 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
std::vector<GpuEventQuery> mInFlightGpuEventQueries;
// A list of gpu events since the last clock sync.
std::vector<GpuEvent> mGpuEvents;
// The current frame index, used to generate a submission-encompassing event tagged with it.
uint32_t mPrimaryBufferEventCounter;
// Cached value of the color attachment mask of the current draw framebuffer. This is used to
// know which attachment indices have their blend state set in |mGraphicsPipelineDesc|, and

Просмотреть файл

@ -393,6 +393,19 @@ class RendererVk : angle::NonCopyable
}
}
angle::VulkanPerfCounters getCommandQueuePerfCounters()
{
vk::ScopedCommandQueueLock lock(this, mCommandQueueMutex);
if (isAsyncCommandQueueEnabled())
{
return mCommandProcessor.getPerfCounters();
}
else
{
return mCommandQueue.getPerfCounters();
}
}
egl::Display *getDisplay() const { return mDisplay; }
VkResult getLastPresentResult(VkSwapchainKHR swapchain)

Просмотреть файл

@ -1922,7 +1922,7 @@ angle::Result WindowSurfaceVk::doDeferredAcquireNextImage(const gl::Context *con
//
// - When EGL_BUFFER_DESTROYED is specified, the contents of the color image can be
// invalidated.
// * This is disabled with buffer age has been queried to work around a dEQP test bug.
// * This is disabled when buffer age has been queried to work around a dEQP test bug.
// - Depth/Stencil can always be invalidated
//
// In all cases, when the present mode is DEMAND_REFRESH, swap is implicit and the swap behavior

Просмотреть файл

@ -4910,6 +4910,58 @@ TEST_P(VulkanPerformanceCounterTest, InvalidateThenRepeatedClearThenReadbackThen
compareColorOpCounters(getPerfCounters(), expected);
}
// Ensure that glFlush doesn't lead to vkQueueSubmit if there's nothing to submit.
TEST_P(VulkanPerformanceCounterTest, UnnecessaryFlushDoesntCauseSubmission)
{
ANGLE_SKIP_TEST_IF(!IsGLExtensionEnabled(kPerfMonitorExtensionName));
initANGLEFeatures();
swapBuffers();
uint32_t expectedSubmittedCommands = getPerfCounters().submittedCommands;
glFlush();
glFlush();
glFlush();
// Nothing was recorded, so there shouldn't be anything to flush.
glFinish();
EXPECT_EQ(getPerfCounters().submittedCommands, expectedSubmittedCommands);
glClearColor(1, 0, 0, 1);
glClear(GL_COLOR_BUFFER_BIT);
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::red);
// One submission for the above readback
++expectedSubmittedCommands;
glFinish();
EXPECT_EQ(getPerfCounters().submittedCommands, expectedSubmittedCommands);
glFlush();
glFlush();
glFlush();
// No addional submissions since last one
glFinish();
EXPECT_EQ(getPerfCounters().submittedCommands, expectedSubmittedCommands);
}
// Ensure that glFenceSync doesn't lead to vkQueueSubmit if there's nothing to submit.
TEST_P(VulkanPerformanceCounterTest, SyncWihtoutCommandsDoesntCauseSubmission)
{
ANGLE_SKIP_TEST_IF(!IsGLExtensionEnabled(kPerfMonitorExtensionName));
initANGLEFeatures();
swapBuffers();
uint32_t expectedSubmittedCommands = getPerfCounters().submittedCommands;
glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// Nothing was recorded, so there shouldn't be anything to flush.
glFinish();
EXPECT_EQ(getPerfCounters().submittedCommands, expectedSubmittedCommands);
}
ANGLE_INSTANTIATE_TEST(VulkanPerformanceCounterTest, ES3_VULKAN(), ES3_VULKAN_SWIFTSHADER());
ANGLE_INSTANTIATE_TEST(VulkanPerformanceCounterTest_ES31, ES31_VULKAN(), ES31_VULKAN_SWIFTSHADER());
ANGLE_INSTANTIATE_TEST(VulkanPerformanceCounterTest_MSAA, ES3_VULKAN(), ES3_VULKAN_SWIFTSHADER());