Vulkan: Use global buffer barriers.

This switches from using resource barriers for buffers to using global
barriers. This matches the general advised best practice. It also
allows us to combine multiple barriers into one. On a draw we might
combine all the vertex and index barriers into a single barrier call.

We implement this using a bit of extra state tracking in BufferHelper.

Bug: angleproject:2828
Change-Id: I196b368804ff50e60d085687a643e5566ba1c5b6
Reviewed-on: https://chromium-review.googlesource.com/c/1309977
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
This commit is contained in:
Jamie Madill 2018-11-12 11:34:24 -05:00 коммит произвёл Commit Bot
Родитель 48f63f90f6
Коммит 03d1a5ed6c
8 изменённых файлов: 159 добавлений и 99 удалений

Просмотреть файл

@ -213,43 +213,8 @@ angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
stagingBuffer.getDeviceMemory().unmap(device);
// Enqueue a copy command on the GPU.
// 'beginWriteResource' will stop any subsequent rendering from using the old buffer data,
// by marking any current read operations / command buffers as 'finished'.
vk::CommandBuffer *commandBuffer = nullptr;
ANGLE_TRY(mBuffer.recordCommands(contextVk, &commandBuffer));
// Insert a barrier to ensure reads from the buffer are complete.
// TODO(jmadill): Insert minimal barriers.
VkBufferMemoryBarrier bufferBarrier = {};
bufferBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
bufferBarrier.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
bufferBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarrier.srcQueueFamilyIndex = 0;
bufferBarrier.dstQueueFamilyIndex = 0;
bufferBarrier.buffer = mBuffer.getBuffer().getHandle();
bufferBarrier.offset = offset;
bufferBarrier.size = static_cast<VkDeviceSize>(size);
commandBuffer->pipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
&bufferBarrier, 0, nullptr);
VkBufferCopy copyRegion = {0, offset, size};
commandBuffer->copyBuffer(stagingBuffer.getBuffer(), mBuffer.getBuffer(), 1, &copyRegion);
// Insert a barrier to ensure copy has done.
// TODO(jie.a.chen@intel.com): Insert minimal barriers.
bufferBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufferBarrier.dstAccessMask =
VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT |
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_READ_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;
commandBuffer->pipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
&bufferBarrier, 0, nullptr);
ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, stagingBuffer.getBuffer(), copyRegion));
// Immediately release staging buffer. We should probably be using a DynamicBuffer here.
renderer->releaseObject(renderer->getCurrentQueueSerial(), &stagingBuffer);

Просмотреть файл

@ -297,7 +297,9 @@ CommandGraphNode::CommandGraphNode(CommandGraphNodeFunction function)
mQueryPool(VK_NULL_HANDLE),
mQueryIndex(0),
mHasChildren(false),
mVisitedState(VisitedState::Unvisited)
mVisitedState(VisitedState::Unvisited),
mGlobalMemoryBarrierSrcAccess(0),
mGlobalMemoryBarrierDstAccess(0)
{
}
@ -429,6 +431,12 @@ void CommandGraphNode::setQueryPool(const QueryPool *queryPool, uint32_t queryIn
mQueryIndex = queryIndex;
}
void CommandGraphNode::addGlobalMemoryBarrier(VkFlags srcAccess, VkFlags dstAccess)
{
mGlobalMemoryBarrierSrcAccess |= srcAccess;
mGlobalMemoryBarrierDstAccess |= dstAccess;
}
void CommandGraphNode::setHasChildren()
{
mHasChildren = true;
@ -480,6 +488,21 @@ angle::Result CommandGraphNode::visitAndExecute(vk::Context *context,
case CommandGraphNodeFunction::Generic:
ASSERT(mQueryPool == VK_NULL_HANDLE);
// Record the deferred pipeline barrier if necessary.
ASSERT((mGlobalMemoryBarrierDstAccess == 0) == (mGlobalMemoryBarrierSrcAccess == 0));
if (mGlobalMemoryBarrierSrcAccess)
{
VkMemoryBarrier memoryBarrier = {};
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = mGlobalMemoryBarrierSrcAccess;
memoryBarrier.dstAccessMask = mGlobalMemoryBarrierDstAccess;
// Use the top of pipe stage to keep the state management simple.
primaryCommandBuffer->pipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 1,
&memoryBarrier, 0, nullptr, 0, nullptr);
}
if (mOutsideRenderPassCommands.valid())
{
ANGLE_TRY(mOutsideRenderPassCommands.end(context));

Просмотреть файл

@ -107,6 +107,8 @@ class CommandGraphNode final : angle::NonCopyable
void setQueryPool(const QueryPool *queryPool, uint32_t queryIndex);
void addGlobalMemoryBarrier(VkFlags srcAccess, VkFlags dstAccess);
private:
void setHasChildren();
@ -142,6 +144,10 @@ class CommandGraphNode final : angle::NonCopyable
// Additional diagnostic information.
CommandGraphResourceType mResourceType;
uintptr_t mResourceID;
// For global memory barriers.
VkFlags mGlobalMemoryBarrierSrcAccess;
VkFlags mGlobalMemoryBarrierDstAccess;
};
// This is a helper class for back-end objects used in Vk command buffers. It records a serial
@ -214,11 +220,17 @@ class RecordableGraphResource : public CommandGraphResource
// Called when 'this' object changes, but we'd like to start a new command buffer later.
void finishCurrentCommands(RendererVk *renderer);
// Store a deferred memory barrier. Will be recorded into a primary command buffer at submit.
void addGlobalMemoryBarrier(VkFlags srcAccess, VkFlags dstAccess)
{
ASSERT(mCurrentWritingNode);
mCurrentWritingNode->addGlobalMemoryBarrier(srcAccess, dstAccess);
}
protected:
explicit RecordableGraphResource(CommandGraphResourceType resourceType);
private:
// Returns true if this node has a current writing node with no children.
bool hasChildlessWritingNode() const
{

Просмотреть файл

@ -428,14 +428,19 @@ angle::Result ContextVk::handleDirtyVertexBuffers(const gl::Context *context,
mProgram->getState().getMaxActiveAttribLocation(),
mVertexArray->getCurrentArrayBufferHandles(), mVertexArray->getCurrentArrayBufferOffsets());
const auto &arrayBufferResources = mVertexArray->getCurrentArrayBufferResources();
const auto &arrayBufferResources = mVertexArray->getCurrentArrayBuffers();
vk::FramebufferHelper *framebuffer = mDrawFramebuffer->getFramebuffer();
for (size_t attribIndex : context->getStateCache().getActiveBufferedAttribsMask())
{
if (arrayBufferResources[attribIndex])
arrayBufferResources[attribIndex]->addReadDependency(
mDrawFramebuffer->getFramebuffer());
vk::BufferHelper *arrayBuffer = arrayBufferResources[attribIndex];
if (arrayBuffer)
{
arrayBuffer->onFramebufferRead(framebuffer, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
}
}
return angle::Result::Continue();
}
@ -446,11 +451,11 @@ angle::Result ContextVk::handleDirtyIndexBuffer(const gl::Context *context,
mVertexArray->getCurrentElementArrayBufferOffset(),
gl_vk::GetIndexType(mCurrentDrawElementsType));
vk::RecordableGraphResource *elementArrayBufferResource =
mVertexArray->getCurrentElementArrayBufferResource();
if (elementArrayBufferResource)
vk::BufferHelper *elementArrayBuffer = mVertexArray->getCurrentElementArrayBuffer();
if (elementArrayBuffer)
{
elementArrayBufferResource->addReadDependency(mDrawFramebuffer->getFramebuffer());
vk::FramebufferHelper *framebuffer = mDrawFramebuffer->getFramebuffer();
elementArrayBuffer->onFramebufferRead(framebuffer, VK_ACCESS_INDEX_READ_BIT);
}
return angle::Result::Continue();
}

Просмотреть файл

@ -64,7 +64,7 @@ VertexArrayVk::VertexArrayVk(const gl::VertexArrayState &state, RendererVk *rend
: VertexArrayImpl(state),
mCurrentArrayBufferHandles{},
mCurrentArrayBufferOffsets{},
mCurrentArrayBufferResources{},
mCurrentArrayBuffers{},
mCurrentArrayBufferFormats{},
mCurrentArrayBufferStrides{},
mCurrentArrayBufferConversion{{
@ -74,7 +74,7 @@ VertexArrayVk::VertexArrayVk(const gl::VertexArrayState &state, RendererVk *rend
mCurrentArrayBufferConversionCanRelease{},
mCurrentElementArrayBufferHandle(VK_NULL_HANDLE),
mCurrentElementArrayBufferOffset(0),
mCurrentElementArrayBufferResource(nullptr),
mCurrentElementArrayBuffer(nullptr),
mPackedInputBindings{},
mPackedInputAttributes{},
mDynamicVertexData(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, kDynamicVertexDataSize),
@ -85,7 +85,7 @@ VertexArrayVk::VertexArrayVk(const gl::VertexArrayState &state, RendererVk *rend
{
mCurrentArrayBufferHandles.fill(VK_NULL_HANDLE);
mCurrentArrayBufferOffsets.fill(0);
mCurrentArrayBufferResources.fill(nullptr);
mCurrentArrayBuffers.fill(nullptr);
for (vk::DynamicBuffer &buffer : mCurrentArrayBufferConversion)
{
@ -254,13 +254,13 @@ angle::Result VertexArrayVk::syncState(const gl::Context *context,
if (bufferGL)
{
BufferVk *bufferVk = vk::GetImpl(bufferGL);
mCurrentElementArrayBufferResource = &bufferVk->getBuffer();
mCurrentElementArrayBuffer = &bufferVk->getBuffer();
mCurrentElementArrayBufferHandle =
bufferVk->getBuffer().getBuffer().getHandle();
}
else
{
mCurrentElementArrayBufferResource = nullptr;
mCurrentElementArrayBuffer = nullptr;
mCurrentElementArrayBufferHandle = VK_NULL_HANDLE;
}
@ -326,12 +326,12 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
ANGLE_TRY(convertVertexBuffer(contextVk, bufferVk, binding, attribIndex));
mCurrentArrayBufferResources[attribIndex] = nullptr;
mCurrentArrayBuffers[attribIndex] = nullptr;
releaseConversion = false;
}
else
{
mCurrentArrayBufferResources[attribIndex] = &bufferVk->getBuffer();
mCurrentArrayBuffers[attribIndex] = &bufferVk->getBuffer();
mCurrentArrayBufferHandles[attribIndex] =
bufferVk->getBuffer().getBuffer().getHandle();
mCurrentArrayBufferOffsets[attribIndex] = binding.getOffset();
@ -340,7 +340,7 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
}
else
{
mCurrentArrayBufferResources[attribIndex] = nullptr;
mCurrentArrayBuffers[attribIndex] = nullptr;
mCurrentArrayBufferHandles[attribIndex] = VK_NULL_HANDLE;
mCurrentArrayBufferOffsets[attribIndex] = 0;
mCurrentArrayBufferStrides[attribIndex] =
@ -352,7 +352,7 @@ angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
contextVk->invalidateDefaultAttribute(attribIndex);
// These will be filled out by the ContextVk.
mCurrentArrayBufferResources[attribIndex] = nullptr;
mCurrentArrayBuffers[attribIndex] = nullptr;
mCurrentArrayBufferHandles[attribIndex] = VK_NULL_HANDLE;
mCurrentArrayBufferOffsets[attribIndex] = 0;
mCurrentArrayBufferStrides[attribIndex] = 0;
@ -584,7 +584,7 @@ void VertexArrayVk::updateDefaultAttrib(RendererVk *renderer,
{
mCurrentArrayBufferHandles[attribIndex] = bufferHandle;
mCurrentArrayBufferOffsets[attribIndex] = offset;
mCurrentArrayBufferResources[attribIndex] = nullptr;
mCurrentArrayBuffers[attribIndex] = nullptr;
mCurrentArrayBufferStrides[attribIndex] = 0;
mCurrentArrayBufferFormats[attribIndex] =
&renderer->getFormat(angle::FormatID::R32G32B32A32_FIXED);

Просмотреть файл

@ -65,9 +65,9 @@ class VertexArrayVk : public VertexArrayImpl
return mCurrentArrayBufferOffsets;
}
const gl::AttribArray<vk::RecordableGraphResource *> &getCurrentArrayBufferResources() const
const gl::AttribArray<vk::BufferHelper *> &getCurrentArrayBuffers() const
{
return mCurrentArrayBufferResources;
return mCurrentArrayBuffers;
}
VkBuffer getCurrentElementArrayBufferHandle() const { return mCurrentElementArrayBufferHandle; }
@ -82,10 +82,7 @@ class VertexArrayVk : public VertexArrayImpl
mCurrentElementArrayBufferOffset = reinterpret_cast<VkDeviceSize>(offset);
}
vk::RecordableGraphResource *getCurrentElementArrayBufferResource() const
{
return mCurrentElementArrayBufferResource;
}
vk::BufferHelper *getCurrentElementArrayBuffer() const { return mCurrentElementArrayBuffer; }
angle::Result updateIndexTranslation(ContextVk *contextVk,
GLsizei indexCount,
@ -121,14 +118,14 @@ class VertexArrayVk : public VertexArrayImpl
gl::AttribArray<VkBuffer> mCurrentArrayBufferHandles;
gl::AttribArray<VkDeviceSize> mCurrentArrayBufferOffsets;
gl::AttribArray<vk::RecordableGraphResource *> mCurrentArrayBufferResources;
gl::AttribArray<vk::BufferHelper *> mCurrentArrayBuffers;
gl::AttribArray<const vk::Format *> mCurrentArrayBufferFormats;
gl::AttribArray<GLuint> mCurrentArrayBufferStrides;
gl::AttribArray<vk::DynamicBuffer> mCurrentArrayBufferConversion;
gl::AttribArray<bool> mCurrentArrayBufferConversionCanRelease;
VkBuffer mCurrentElementArrayBufferHandle;
VkDeviceSize mCurrentElementArrayBufferOffset;
vk::RecordableGraphResource *mCurrentElementArrayBufferResource;
vk::BufferHelper *mCurrentElementArrayBuffer;
// Keep a cache of binding and attribute descriptions for easy pipeline updates.
// This is copied out of here into the pipeline description on a Context state change.

Просмотреть файл

@ -887,7 +887,10 @@ void LineLoopHelper::Draw(uint32_t count, CommandBuffer *commandBuffer)
// BufferHelper implementation.
BufferHelper::BufferHelper()
: RecordableGraphResource(CommandGraphResourceType::Buffer), mMemoryPropertyFlags{}
: RecordableGraphResource(CommandGraphResourceType::Buffer),
mMemoryPropertyFlags{},
mCurrentWriteAccess(0),
mCurrentReadAccess(0)
{
}
@ -908,6 +911,47 @@ void BufferHelper::release(RendererVk *renderer)
renderer->releaseObject(getStoredQueueSerial(), &mDeviceMemory);
}
void BufferHelper::onFramebufferRead(FramebufferHelper *framebuffer, VkAccessFlagBits accessType)
{
addReadDependency(framebuffer);
if ((mCurrentWriteAccess != 0) && ((mCurrentReadAccess & accessType) == 0))
{
framebuffer->addGlobalMemoryBarrier(mCurrentWriteAccess, accessType);
mCurrentReadAccess |= accessType;
}
}
angle::Result BufferHelper::copyFromBuffer(ContextVk *contextVk,
const Buffer &buffer,
const VkBufferCopy &copyRegion)
{
// 'recordCommands' will implicitly stop any reads from using the old buffer data.
vk::CommandBuffer *commandBuffer = nullptr;
ANGLE_TRY(recordCommands(contextVk, &commandBuffer));
if (mCurrentReadAccess != 0 || mCurrentWriteAccess != 0)
{
// Insert a barrier to ensure reads/writes are complete.
// Use a global memory barrier to keep things simple.
VkMemoryBarrier memoryBarrier = {};
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = mCurrentReadAccess;
memoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
commandBuffer->pipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, &memoryBarrier, 0,
nullptr, 0, nullptr);
mCurrentWriteAccess = VK_ACCESS_TRANSFER_WRITE_BIT;
mCurrentReadAccess = 0;
}
commandBuffer->copyBuffer(buffer, mBuffer, 1, &copyRegion);
return angle::Result::Continue();
}
// ImageHelper implementation.
ImageHelper::ImageHelper()
: RecordableGraphResource(CommandGraphResourceType::Image),

Просмотреть файл

@ -369,6 +369,8 @@ class LineLoopHelper final : angle::NonCopyable
DynamicBuffer mDynamicIndexBuffer;
};
class FramebufferHelper;
class BufferHelper final : public RecordableGraphResource
{
public:
@ -384,6 +386,14 @@ class BufferHelper final : public RecordableGraphResource
const Buffer &getBuffer() const { return mBuffer; }
const DeviceMemory &getDeviceMemory() const { return mDeviceMemory; }
// Helper for setting the graph dependencies *and* setting the appropriate barrier.
void onFramebufferRead(FramebufferHelper *framebuffer, VkAccessFlagBits accessType);
// Also implicitly sets up the correct barriers.
angle::Result copyFromBuffer(ContextVk *contextVk,
const Buffer &buffer,
const VkBufferCopy &copyRegion);
private:
// Vulkan objects.
Buffer mBuffer;
@ -391,6 +401,10 @@ class BufferHelper final : public RecordableGraphResource
// Cached properties.
VkMemoryPropertyFlags mMemoryPropertyFlags;
// For memory barriers.
VkFlags mCurrentWriteAccess;
VkFlags mCurrentReadAccess;
};
class ImageHelper final : public RecordableGraphResource