Vulkan: Restart RenderPasses in DS read-only mode.

We can combine an initial RenderPass with a read-only RP if the first
RP never writes to depth. We can check the depth-write tracking in
CommandBufferHelper and substitute in a new Framebuffer/RP Desc in this
case as well as issue new layout barriers. We need to disable barrier
merging in this special case.

This reduces the RenderPass count in the Manhattan trace from 15->13.
The performance on the Pixel 4 benchmark goes to ~82% of native for
the on-screen version and ~88% for off-screen. There's also a ~5% bump
in speed for the desktop trace.

Bug: angleproject:4959
Change-Id: I70241824f75eaa1e11b50370f826abc36e91686e
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2358772
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Tim Van Patten <timvp@google.com>
Reviewed-by: Charlie Lao <cclao@google.com>
This commit is contained in:
Jamie Madill 2020-08-15 17:26:04 -04:00 коммит произвёл Commit Bot
Родитель 552f0f7623
Коммит d3e800e9ad
7 изменённых файлов: 204 добавлений и 9 удалений

Просмотреть файл

@ -2999,6 +2999,7 @@ angle::Result ContextVk::syncState(const gl::Context *context,
gl::Framebuffer *drawFramebuffer = glState.getDrawFramebuffer();
mDrawFramebuffer = vk::GetImpl(drawFramebuffer);
mDrawFramebuffer->setReadOnlyDepthMode(false);
updateFlipViewportDrawFramebuffer(glState);
updateSurfaceRotationDrawFramebuffer(glState);
updateViewport(mDrawFramebuffer, glState.getViewport(), glState.getNearPlane(),
@ -3896,6 +3897,11 @@ angle::Result ContextVk::updateActiveTextures(const gl::Context *context)
}
else if (shouldSwitchToDepthReadOnlyMode(context, texture))
{
// The "readOnlyDepthMode" feature enables read-only depth-stencil feedback loops. We
// only switch to "read-only" mode when there's loop. We track the depth-stencil access
// mode in the RenderPass. The tracking tells us when we can retroactively go back and
// change the RenderPass to read-only. If there are any writes we need to break and
// finish the current RP before starting the read-only one.
ASSERT(!mState.isDepthWriteEnabled());
// Special handling for deferred clears.
@ -3906,11 +3912,19 @@ angle::Result ContextVk::updateActiveTextures(const gl::Context *context)
ANGLE_TRY(mDrawFramebuffer->flushDeferredClears(this, scissoredRenderArea));
}
// TODO(jmadill): Don't end RenderPass. http://anglebug.com/4959
if (hasStartedRenderPass())
{
ANGLE_TRY(flushCommandsAndEndRenderPass());
if (mRenderPassCommands->getDepthStartAccess() == vk::ResourceAccess::Write)
{
ANGLE_TRY(flushCommandsAndEndRenderPass());
}
else
{
ANGLE_TRY(mDrawFramebuffer->restartRenderPassInReadOnlyDepthMode(
this, mRenderPassCommands));
}
}
mDrawFramebuffer->setReadOnlyDepthMode(true);
}
@ -4846,7 +4860,7 @@ void ContextVk::setDefaultUniformBlocksMinSizeForTesting(size_t minSize)
angle::Result ContextVk::updateRenderPassDepthAccess()
{
if (mState.isDepthTestEnabled() && mRenderPassCommands->started())
if (mState.isDepthTestEnabled() && hasStartedRenderPass())
{
vk::ResourceAccess access = GetDepthAccess(mState.getDepthStencilState());

Просмотреть файл

@ -2503,4 +2503,19 @@ void FramebufferVk::setReadOnlyDepthMode(bool readOnlyDepthEnabled)
updateRenderPassDesc();
}
}
angle::Result FramebufferVk::restartRenderPassInReadOnlyDepthMode(
ContextVk *contextVk,
vk::CommandBufferHelper *renderPass)
{
ASSERT(!isReadOnlyDepthMode());
setReadOnlyDepthMode(true);
vk::Framebuffer *currentFramebuffer = nullptr;
ANGLE_TRY(getFramebuffer(contextVk, &currentFramebuffer, nullptr));
renderPass->restartRenderPassWithReadOnlyDepth(*currentFramebuffer, mRenderPassDesc);
return angle::Result::Continue;
}
} // namespace rx

Просмотреть файл

@ -139,6 +139,8 @@ class FramebufferVk : public FramebufferImpl
bool hasDeferredClears() const { return !mDeferredClears.empty(); }
angle::Result flushDeferredClears(ContextVk *contextVk, const gl::Rectangle &renderArea);
void setReadOnlyDepthMode(bool readOnlyDepthEnabled);
angle::Result restartRenderPassInReadOnlyDepthMode(ContextVk *contextVk,
vk::CommandBufferHelper *renderPass);
private:
FramebufferVk(RendererVk *renderer,

Просмотреть файл

@ -582,6 +582,7 @@ CommandBufferHelper::CommandBufferHelper()
mCounter(0),
mClearValues{},
mRenderPassStarted(false),
mForceIndividualBarriers(false),
mTransformFeedbackCounterBuffers{},
mValidTransformFeedbackBufferCount(0),
mRebindTransformFeedbackBuffers(false),
@ -734,7 +735,20 @@ void CommandBufferHelper::executeBarriers(ContextVk *contextVk, PrimaryCommandBu
return;
}
if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled)
if (mForceIndividualBarriers)
{
// Note: ideally we could merge double barriers into a single barrier (or even completely
// eliminate them in some cases). This is a bit trickier to manage than splitting barriers
// into single calls. It should only affect Framebuffer transitions.
// TODO: Investigate merging barriers. http://anglebug.com/4976
for (PipelineStage pipelineStage : mask)
{
PipelineBarrier &barrier = mPipelineBarriers[pipelineStage];
barrier.executeIndividually(primary);
}
mForceIndividualBarriers = false;
}
else if (contextVk->getFeatures().preferAggregateBarrierCalls.enabled)
{
PipelineStagesMask::Iterator iter = mask.begin();
PipelineBarrier &barrier = mPipelineBarriers[*iter];
@ -770,14 +784,45 @@ void CommandBufferHelper::beginRenderPass(const Framebuffer &framebuffer,
mAttachmentOps = renderPassAttachmentOps;
mDepthStencilAttachmentIndex = depthStencilAttachmentIndex;
mFramebuffer.setHandle(framebuffer.getHandle());
mRenderArea = renderArea;
mClearValues = clearValues;
*commandBufferOut = &mCommandBuffer;
mRenderArea = renderArea;
mClearValues = clearValues;
*commandBufferOut = &mCommandBuffer;
mForceIndividualBarriers = false;
if (mDepthStencilAttachmentIndex != vk::kInvalidAttachmentIndex)
{
if (renderPassAttachmentOps[mDepthStencilAttachmentIndex].loadOp ==
VK_ATTACHMENT_LOAD_OP_CLEAR)
{
mDepthStartAccess = ResourceAccess::Write;
}
if (renderPassAttachmentOps[mDepthStencilAttachmentIndex].stencilLoadOp ==
VK_ATTACHMENT_LOAD_OP_CLEAR)
{
mStencilStartAccess = ResourceAccess::Write;
}
}
mRenderPassStarted = true;
mCounter++;
}
void CommandBufferHelper::restartRenderPassWithReadOnlyDepth(const Framebuffer &framebuffer,
const RenderPassDesc &renderPassDesc)
{
ASSERT(mIsRenderPassCommandBuffer);
ASSERT(mRenderPassStarted);
mRenderPassDesc = renderPassDesc;
mAttachmentOps.setLayouts(mDepthStencilAttachmentIndex, ImageLayout::DepthStencilReadOnly,
ImageLayout::DepthStencilReadOnly);
mFramebuffer.setHandle(framebuffer.getHandle());
// Barrier aggregation messes up with RenderPass restarting.
mForceIndividualBarriers = true;
}
void CommandBufferHelper::endRenderPass()
{
pauseTransformFeedbackIfStarted();

Просмотреть файл

@ -636,6 +636,33 @@ class PipelineBarrier : angle::NonCopyable
reset();
}
void executeIndividually(PrimaryCommandBuffer *primary)
{
if (isEmpty())
{
return;
}
// Issue vkCmdPipelineBarrier call
VkMemoryBarrier memoryBarrier = {};
uint32_t memoryBarrierCount = 0;
if (mMemoryBarrierSrcAccess != 0)
{
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = mMemoryBarrierSrcAccess;
memoryBarrier.dstAccessMask = mMemoryBarrierDstAccess;
memoryBarrierCount++;
}
for (const VkImageMemoryBarrier &imageBarrier : mImageMemoryBarriers)
{
primary->pipelineBarrier(mSrcStageMask, mDstStageMask, 0, memoryBarrierCount,
&memoryBarrier, 0, nullptr, 1, &imageBarrier);
}
reset();
}
// merge two barriers into one
void merge(PipelineBarrier *other)
{
@ -915,6 +942,9 @@ class CommandBufferHelper : angle::NonCopyable
void endRenderPass();
void restartRenderPassWithReadOnlyDepth(const Framebuffer &framebuffer,
const RenderPassDesc &renderPassDesc);
void beginTransformFeedback(size_t validBufferCount,
const VkBuffer *counterBuffers,
bool rebindBuffers);
@ -992,6 +1022,7 @@ class CommandBufferHelper : angle::NonCopyable
void updateRenderPassForResolve(vk::Framebuffer *newFramebuffer,
const vk::RenderPassDesc &renderPassDesc);
ResourceAccess getDepthStartAccess() const { return mDepthStartAccess; }
private:
void addCommandDiagnostics(ContextVk *contextVk);
@ -1012,6 +1043,7 @@ class CommandBufferHelper : angle::NonCopyable
gl::Rectangle mRenderArea;
ClearValuesArray mClearValues;
bool mRenderPassStarted;
bool mForceIndividualBarriers;
// Transform feedback state
gl::TransformFeedbackBuffersArray<VkBuffer> mTransformFeedbackCounterBuffers;

Просмотреть файл

@ -1722,6 +1722,94 @@ void main()
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
}
// Tests corner cases with read-only depth-stencil feedback loops.
TEST_P(FramebufferTest_ES3, ReadOnlyDepthFeedbackLoopStateChanges)
{
// Feedback loops not supported on D3D11 and may not ever be.
ANGLE_SKIP_TEST_IF(IsD3D11());
// Also this particular test doesn't work on Android despite similar support in Manhattan.
ANGLE_SKIP_TEST_IF(IsAndroid() && IsOpenGLES());
constexpr GLuint kSize = 2;
glViewport(0, 0, kSize, kSize);
constexpr char kFS[] = R"(precision mediump float;
varying vec2 v_texCoord;
uniform sampler2D depth;
void main()
{
if (abs(texture2D(depth, v_texCoord).x - 0.5) < 0.1)
{
gl_FragColor = vec4(0, 1, 0, 1);
}
else
{
gl_FragColor = vec4(1, 0, 0, 1);
}
})";
ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Texture2D(), kFS);
glUseProgram(program);
setupQuadVertexBuffer(0.5f, 1.0f);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
GLFramebuffer framebuffer1;
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer1);
GLTexture colorTexture;
glBindTexture(GL_TEXTURE_2D, colorTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, kSize, kSize, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colorTexture, 0);
GLTexture depthTexture;
glBindTexture(GL_TEXTURE_2D, depthTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, kSize, kSize, 0, GL_DEPTH_COMPONENT,
GL_UNSIGNED_INT, nullptr);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTexture, 0);
ASSERT_GL_FRAMEBUFFER_COMPLETE(GL_FRAMEBUFFER);
GLFramebuffer framebuffer2;
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTexture, 0);
ASSERT_GL_FRAMEBUFFER_COMPLETE(GL_FRAMEBUFFER);
ASSERT_GL_NO_ERROR();
// Clear depth to 0.5.
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer1);
glClearDepthf(0.5f);
glClear(GL_DEPTH_BUFFER_BIT);
glFlush();
// Disable depth. Although this does not remove the feedback loop as defined by the
// spec it mimics what gfxbench does in its rendering tests.
glDepthMask(false);
glDisable(GL_DEPTH_TEST);
// Draw with loop.
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Draw with no loop and second FBO. Starts RP in writable mode.
glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);
glBindTexture(GL_TEXTURE_2D, 0);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Draw with loop, restarts RP.
glBindTexture(GL_TEXTURE_2D, depthTexture);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
}
// Covers a bug in ANGLE's Vulkan back-end. Our VkFramebuffer cache would in some cases forget to
// check the draw states when computing a cache key.
TEST_P(FramebufferTest_ES3, DisabledAttachmentRedefinition)

Просмотреть файл

@ -361,8 +361,7 @@ TEST_P(VulkanPerformanceCounterTest, ReadOnlyDepthStencilFeedbackLoopUsesSingleR
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// TODO(jmadill): Remove extra RenderPass. http://anglebug.com/4959
uint32_t expectedRenderPassCount = counters.renderPasses + 2;
uint32_t expectedRenderPassCount = counters.renderPasses + 1;
// Start new RenderPass with depth write disabled and no loop.
glBindFramebuffer(GL_FRAMEBUFFER, depthAndColorFBO);