Metal: Optimized BufferSubData per device

Adds a staging buffer path which means there are 4 paths
for bufferSubData.

1. direct copy

   * get a pointer to the buffer
   * copy the new data to the buffer
   * if the buffer is managed, tell metal which part was updated

2. use a shadow copy

   * copy the data to a shadow copy
   * copy the entire shadow to a new buffer
   * start using the new buffer

3. use a new buffer

   * get a new buffer (or unused)
   * put the new data in the new buffer
   * blit any unchanged data from the old buffer to the new buffer
   * start using the new buffer

4. use a staging buffer

   * get a staging buffer
   * put the new data in the staging buffer
   * blit from the staging buffer to the existing buffer.

Further, there are 3 types of memory storage modes.
Managed, Staged, Private.

Based on the GPU type different storage modes and different
paths in different sitatutions are more performant.

So, add feature flags to select paths by GPU.

Bug: angleproject:7544
Change-Id: I741dd1874201043416374194bd2001ded8dbd9b4
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3842641
Reviewed-by: Kyle Piddington <kpiddington@apple.com>
Reviewed-by: Kenneth Russell <kbr@chromium.org>
Reviewed-by: Quyen Le <lehoangquyen@chromium.org>
Commit-Queue: Gregg Tavares <gman@chromium.org>
This commit is contained in:
Gregg Tavares 2022-08-19 12:11:23 -07:00 коммит произвёл Angle LUCI CQ
Родитель 0e0ea0265b
Коммит 968041b547
24 изменённых файлов: 900 добавлений и 162 удалений

Просмотреть файл

@ -232,6 +232,31 @@ struct FeaturesMtl : FeatureSetBase
"uploadDataToIosurfacesWithStagingBuffers", FeatureCategory::MetalWorkarounds,
"When uploading data to IOSurface-backed textures, use a staging buffer.", &members,
"http://anglebug.com/7573"};
FeatureInfo alwaysUseStagedBufferUpdates = {
"alwaysUseStagedBufferUpdates", FeatureCategory::MetalFeatures,
"Always update buffers by copying the data to a staging buffer and then blitting it to the "
"actual buffer",
&members, "http://anglebug.com/7544"};
FeatureInfo useShadowBuffersWhenAppropriate = {
"useShadowBuffersWhenAppropriate", FeatureCategory::MetalFeatures,
"On some architectures using a shadow buffer can be faster for certain size buffers",
&members, "http://anglebug.com/7544"};
FeatureInfo alwaysUseManagedStorageModeForBuffers = {
"alwaysUseManagedStorageModeForBuffers", FeatureCategory::MetalFeatures,
"Metal buffers can be managed, shared, or private. Sometimes managed is fastest", &members,
"http://anglebug.com/7544"};
FeatureInfo alwaysUseSharedStorageModeForBuffers = {
"alwaysUseSharedStorageModeForBuffers", FeatureCategory::MetalFeatures,
"Metal buffers can be managed, shared, or private. Sometimes shared is fastest", &members,
"http://anglebug.com/7544"};
FeatureInfo preferCpuForBuffersubdata = {
"preferCpuForBuffersubdata", FeatureCategory::MetalFeatures,
"Makes bufferSubData always update via CPU", &members, "http://anglebug.com/7544"};
};
inline FeaturesMtl::FeaturesMtl() = default;

Просмотреть файл

@ -243,6 +243,46 @@
"When uploading data to IOSurface-backed textures, use a staging buffer."
],
"issue": "http://anglebug.com/7573"
},
{
"name": "always_use_staged_buffer_updates",
"category": "Features",
"description": [
"Always update buffers by copying the data to a staging buffer and then blitting it to the actual buffer"
],
"issue": "http://anglebug.com/7544"
},
{
"name": "use_shadow_buffers_when_appropriate",
"category": "Features",
"description": [
"On some architectures using a shadow buffer can be faster for certain size buffers"
],
"issue": "http://anglebug.com/7544"
},
{
"name": "always_use_managed_storage_mode_for_buffers",
"category": "Features",
"description": [
"Metal buffers can be managed, shared, or private. Sometimes managed is fastest"
],
"issue": "http://anglebug.com/7544"
},
{
"name": "always_use_shared_storage_mode_for_buffers",
"category": "Features",
"description": [
"Metal buffers can be managed, shared, or private. Sometimes shared is fastest"
],
"issue": "http://anglebug.com/7544"
},
{
"name": "prefer_cpu_for_buffersubdata",
"category": "Features",
"description": [
"Makes bufferSubData always update via CPU"
],
"issue": "http://anglebug.com/7544"
}
]
}

Просмотреть файл

@ -4,7 +4,7 @@
"include/platform/FeaturesGL_autogen.h":
"c192145f3939d4d0bf85a39649e0c14e",
"include/platform/FeaturesMtl_autogen.h":
"80c0f3379882d1f67e523a3a1530cd79",
"c31c1c77040ef119dfaf882b5b5e65ab",
"include/platform/FeaturesVk_autogen.h":
"03f5b51f08b6cb4f831764aa4848f399",
"include/platform/FrontendFeatures_autogen.h":
@ -18,11 +18,11 @@
"include/platform/gl_features.json":
"a50e9bd2fa9eb0685d9b1c118a21ad2c",
"include/platform/mtl_features.json":
"9833c17145ba2223da2e607a9340afda",
"408385ed8fa29652e23a6338faec6d2f",
"include/platform/vk_features.json":
"a0dd571e23e0bd521eb42d72a0863297",
"util/angle_features_autogen.cpp":
"b6a2d2cac7d30b6c08d9398fed38a14c",
"d43086098956bfd4374284a05cfb884d",
"util/angle_features_autogen.h":
"1e4b7c6e89ee370d052fa7f0c48c11c6"
"80421f1223abdee293434a2c7f8ff3bc"
}

Просмотреть файл

@ -51,6 +51,8 @@ _metal_backend_sources = [
"TransformFeedbackMtl.mm",
"VertexArrayMtl.h",
"VertexArrayMtl.mm",
"mtl_buffer_manager.h",
"mtl_buffer_manager.mm",
"mtl_buffer_pool.h",
"mtl_buffer_pool.mm",
"mtl_command_buffer.h",

Просмотреть файл

@ -151,7 +151,8 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl
size_t count,
std::pair<uint32_t, uint32_t> *outIndices);
const uint8_t *getClientShadowCopyData(ContextMtl *contextMtl);
const uint8_t *getBufferDataReadOnly(ContextMtl *contextMtl);
bool isSafeToReadFromBufferViaBlit(ContextMtl *contextMtl);
ConversionBufferMtl *getVertexConversionBuffer(ContextMtl *context,
angle::FormatID formatID,
@ -186,27 +187,36 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl
size_t size,
size_t offset);
angle::Result commitShadowCopy(const gl::Context *context);
angle::Result commitShadowCopy(const gl::Context *context, size_t size);
angle::Result commitShadowCopy(ContextMtl *contextMtl);
angle::Result commitShadowCopy(ContextMtl *contextMtl, size_t size);
void markConversionBuffersDirty();
void clearConversionBuffers();
angle::Result putDataInNewBufferAndStartUsingNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result updateExistingBufferViaBlitFromStagingBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result copyDataToExistingBufferViaCPU(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
angle::Result updateShadowCopyThenCopyShadowToNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset);
bool clientShadowCopyDataNeedSync(ContextMtl *contextMtl);
void ensureShadowCopySyncedFromGPU(ContextMtl *contextMtl);
uint8_t *syncAndObtainShadowCopy(ContextMtl *contextMtl);
// Convenient method
const uint8_t *getClientShadowCopyData(const gl::Context *context)
{
return getClientShadowCopyData(mtl::GetImpl(context));
}
// Client side shadow buffer
// Optional client side shadow buffer
angle::MemoryBuffer mShadowCopy;
// GPU side buffers pool
mtl::BufferPool mBufferPool;
// A cache of converted vertex data.
std::vector<VertexConversionBufferMtl> mVertexConversionBuffers;
@ -224,6 +234,9 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl
};
std::optional<RestartRangeCache> mRestartRangeCache;
std::vector<IndexRange> mRestartIndices;
size_t mGLSize = 0; // size GL asked for (vs size we actually allocated)
size_t mRevisionCount = 0; // for generating labels only
gl::BufferUsage mUsage;
};
class SimpleWeakBufferHolderMtl : public BufferHolderMtl

Просмотреть файл

@ -13,6 +13,7 @@
#include "common/utilities.h"
#include "libANGLE/renderer/metal/ContextMtl.h"
#include "libANGLE/renderer/metal/DisplayMtl.h"
#include "libANGLE/renderer/metal/mtl_buffer_manager.h"
namespace rx
{
@ -39,6 +40,12 @@ angle::Result GetFirstLastIndices(const IndexType *indices,
return angle::Result::Continue;
}
bool isOffsetAndSizeMetalBlitCompatible(size_t offset, size_t size)
{
// Metal requires offset and size to be multiples of 4
return offset % 4 == 0 && size % 4 == 0;
}
} // namespace
// ConversionBufferMtl implementation.
@ -88,9 +95,7 @@ VertexConversionBufferMtl::VertexConversionBufferMtl(ContextMtl *context,
{}
// BufferMtl implementation
BufferMtl::BufferMtl(const gl::BufferState &state)
: BufferImpl(state), mBufferPool(/** alwaysAllocNewBuffer */ true)
{}
BufferMtl::BufferMtl(const gl::BufferState &state) : BufferImpl(state) {}
BufferMtl::~BufferMtl() {}
@ -98,8 +103,13 @@ void BufferMtl::destroy(const gl::Context *context)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
mShadowCopy.clear();
mBufferPool.destroy(contextMtl);
mBuffer = nullptr;
// if there's a buffer, give it back to the buffer manager
if (mBuffer)
{
contextMtl->getBufferManager().returnBuffer(contextMtl, mBuffer);
mBuffer = nullptr;
}
clearConversionBuffers();
}
@ -136,19 +146,30 @@ angle::Result BufferMtl::copySubData(const gl::Context *context,
ContextMtl *contextMtl = mtl::GetImpl(context);
auto srcMtl = GetAs<BufferMtl>(source);
if (srcMtl->clientShadowCopyDataNeedSync(contextMtl) || mBuffer->isBeingUsedByGPU(contextMtl))
{
// If shadow copy requires a synchronization then use blit command instead.
// It might break a pending render pass, but still faster than synchronization with
// GPU.
mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset,
size);
markConversionBuffersDirty();
return angle::Result::Continue;
if (mShadowCopy.size() > 0)
{
if (srcMtl->clientShadowCopyDataNeedSync(contextMtl) ||
mBuffer->isBeingUsedByGPU(contextMtl))
{
// If shadow copy requires a synchronization then use blit command instead.
// It might break a pending render pass, but still faster than synchronization with
// GPU.
mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset,
size);
return angle::Result::Continue;
}
return setSubDataImpl(context, srcMtl->getBufferDataReadOnly(contextMtl) + sourceOffset,
size, destOffset);
}
return setSubDataImpl(context, srcMtl->getClientShadowCopyData(contextMtl) + sourceOffset, size,
destOffset);
mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset, size);
return angle::Result::Continue;
}
angle::Result BufferMtl::map(const gl::Context *context, GLenum access, void **mapPtr)
@ -176,7 +197,7 @@ angle::Result BufferMtl::mapRange(const gl::Context *context,
if (mapPtr)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
if (mBufferPool.getMaxBuffers() == 1)
if (mShadowCopy.size() == 0)
{
*mapPtr = mBuffer->mapWithOpt(contextMtl, (access & GL_MAP_WRITE_BIT) == 0,
access & GL_MAP_UNSYNCHRONIZED_BIT) +
@ -199,7 +220,7 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result)
markConversionBuffersDirty();
if (mBufferPool.getMaxBuffers() == 1)
if (mShadowCopy.size() == 0)
{
ASSERT(mBuffer);
if (mState.getAccessFlags() & GL_MAP_WRITE_BIT)
@ -215,8 +236,6 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result)
}
else
{
ASSERT(mShadowCopy.size());
if (mState.getAccessFlags() & GL_MAP_UNSYNCHRONIZED_BIT)
{
// Copy the mapped region without synchronization with GPU
@ -228,7 +247,7 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result)
else
{
// commit shadow copy data to GPU synchronously
ANGLE_TRY(commitShadowCopy(context));
ANGLE_TRY(commitShadowCopy(contextMtl));
}
}
@ -247,7 +266,7 @@ angle::Result BufferMtl::getIndexRange(const gl::Context *context,
bool primitiveRestartEnabled,
gl::IndexRange *outRange)
{
const uint8_t *indices = getClientShadowCopyData(mtl::GetImpl(context)) + offset;
const uint8_t *indices = getBufferDataReadOnly(mtl::GetImpl(context)) + offset;
*outRange = gl::ComputeIndexRange(type, indices, count, primitiveRestartEnabled);
@ -260,7 +279,7 @@ angle::Result BufferMtl::getFirstLastIndices(ContextMtl *contextMtl,
size_t count,
std::pair<uint32_t, uint32_t> *outIndices)
{
const uint8_t *indices = getClientShadowCopyData(contextMtl) + offset;
const uint8_t *indices = getBufferDataReadOnly(contextMtl) + offset;
switch (type)
{
@ -283,10 +302,9 @@ void BufferMtl::onDataChanged()
markConversionBuffersDirty();
}
/* public */
const uint8_t *BufferMtl::getClientShadowCopyData(ContextMtl *contextMtl)
const uint8_t *BufferMtl::getBufferDataReadOnly(ContextMtl *contextMtl)
{
if (mBufferPool.getMaxBuffers() == 1)
if (mShadowCopy.size() == 0)
{
// Don't need shadow copy in this case, use the buffer directly
return mBuffer->mapReadOnly(contextMtl);
@ -479,13 +497,45 @@ const std::vector<IndexRange> BufferMtl::getRestartIndicesFromClientData(
return restartIndices;
}
namespace
{
bool useSharedMemory(ContextMtl *contextMtl, gl::BufferUsage usage)
{
const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures();
if (features.alwaysUseManagedStorageModeForBuffers.enabled)
{
return false;
}
if (features.alwaysUseSharedStorageModeForBuffers.enabled)
{
return true;
}
switch (usage)
{
case gl::BufferUsage::StaticCopy:
case gl::BufferUsage::StaticDraw:
case gl::BufferUsage::StaticRead:
case gl::BufferUsage::DynamicRead:
case gl::BufferUsage::StreamRead:
return true;
default:
return false;
}
}
} // namespace
angle::Result BufferMtl::setDataImpl(const gl::Context *context,
gl::BufferBinding target,
const void *data,
size_t intendedSize,
gl::BufferUsage usage)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
ContextMtl *contextMtl = mtl::GetImpl(context);
const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures();
// Invalidate conversion buffers
if (mState.getSize() != static_cast<GLint64>(intendedSize))
@ -497,80 +547,32 @@ angle::Result BufferMtl::setDataImpl(const gl::Context *context,
markConversionBuffersDirty();
}
mUsage = usage;
mGLSize = intendedSize;
size_t adjustedSize = std::max<size_t>(1, intendedSize);
// Ensures no validation layer issues in std140 with data types like vec3 being 12 bytes vs 16
// in MSL.
if (target == gl::BufferBinding::Uniform)
{
// This doesn't work! A buffer can be allocated on ARRAY_BUFFER and used in UNIFORM_BUFFER
// TODO(anglebug.com/7585)
adjustedSize = roundUpPow2(adjustedSize, (size_t)16);
}
size_t maxBuffers;
switch (usage)
{
case gl::BufferUsage::StaticCopy:
case gl::BufferUsage::StaticDraw:
case gl::BufferUsage::StaticRead:
case gl::BufferUsage::DynamicRead:
case gl::BufferUsage::StreamRead:
maxBuffers = 1; // static/read buffer doesn't need high speed data update
mBufferPool.setAlwaysUseGPUMem();
break;
default:
// dynamic buffer, allow up to 10 update per frame/encoding without
// waiting for GPU.
if (adjustedSize <= mtl::kSharedMemBufferMaxBufSizeHint)
{
maxBuffers = 10;
mBufferPool.setAlwaysUseSharedMem();
}
else
{
maxBuffers = 1;
mBufferPool.setAlwaysUseGPUMem();
}
break;
}
// Re-create the buffer
mBuffer = nullptr;
ANGLE_TRY(mBufferPool.reset(contextMtl, adjustedSize, 1, maxBuffers));
if (maxBuffers > 1)
mtl::BufferManager &bufferManager = contextMtl->getBufferManager();
if (mBuffer)
{
// We use shadow copy to maintain consistent data between buffers in pool
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(adjustedSize), GL_OUT_OF_MEMORY);
if (data)
{
// Transfer data to shadow copy buffer
auto ptr = static_cast<const uint8_t *>(data);
std::copy(ptr, ptr + intendedSize, mShadowCopy.data());
// Transfer data from shadow copy buffer to GPU buffer.
ANGLE_TRY(commitShadowCopy(context, adjustedSize));
}
else
{
// This is needed so that first buffer pointer could be available
ANGLE_TRY(commitShadowCopy(context, 0));
}
// Return the current buffer to the buffer manager
// It will not be re-used until it's no longer in use.
bufferManager.returnBuffer(contextMtl, mBuffer);
mBuffer = nullptr;
}
else
{
// We don't need shadow copy if there will be only one buffer in the pool.
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(0), GL_OUT_OF_MEMORY);
// Allocate one buffer to use
ANGLE_TRY(
mBufferPool.allocate(contextMtl, adjustedSize, nullptr, &mBuffer, nullptr, nullptr));
if (data)
{
ANGLE_TRY(setSubDataImpl(context, data, intendedSize, 0));
}
}
// Get a new buffer
bool useSharedMem = useSharedMemory(contextMtl, usage);
ANGLE_TRY(bufferManager.getBuffer(contextMtl, adjustedSize, useSharedMem, mBuffer));
#ifndef NDEBUG
ANGLE_MTL_OBJC_SCOPE
@ -579,9 +581,137 @@ angle::Result BufferMtl::setDataImpl(const gl::Context *context,
}
#endif
// We may use shadow copy to maintain consistent data between buffers in pool
size_t shadowSize = (!features.preferCpuForBuffersubdata.enabled &&
features.useShadowBuffersWhenAppropriate.enabled &&
adjustedSize <= mtl::kSharedMemBufferMaxBufSizeHint)
? adjustedSize
: 0;
ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(shadowSize), GL_OUT_OF_MEMORY);
if (data)
{
ANGLE_TRY(setSubDataImpl(context, data, intendedSize, 0));
}
return angle::Result::Continue;
}
// states:
// * The buffer is not use
//
// safe = true
//
// * The buffer has a pending blit
//
// In this case, as long as we are only reading from it
// via blit to a new buffer our blits will happen after existing
// blits
//
// safe = true
//
// * The buffer has pending writes in a commited render encoder
//
// In this case we're encoding commands that will happen after
// that encoder
//
// safe = true
//
// * The buffer has pending writes in the current render encoder
//
// in this case we have to split/end the render encoder
// before we can use the buffer.
//
// safe = false
bool BufferMtl::isSafeToReadFromBufferViaBlit(ContextMtl *contextMtl)
{
uint64_t serial = mBuffer->getLastWritingRenderEncoderSerial();
bool isSameSerial = contextMtl->isCurrentRenderEncoderSerial(serial);
return !isSameSerial;
}
angle::Result BufferMtl::updateExistingBufferViaBlitFromStagingBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset)
{
ASSERT(isOffsetAndSizeMetalBlitCompatible(offset, sizeToCopy));
mtl::BufferManager &bufferManager = contextMtl->getBufferManager();
return bufferManager.queueBlitCopyDataToBuffer(contextMtl, srcPtr, sizeToCopy, offset, mBuffer);
}
// * get a new or unused buffer
// * copy the new data to it
// * copy any old data not overwriten by the new data to the new buffer
// * start using the new buffer
angle::Result BufferMtl::putDataInNewBufferAndStartUsingNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset)
{
ASSERT(isOffsetAndSizeMetalBlitCompatible(offset, sizeToCopy));
mtl::BufferManager &bufferManager = contextMtl->getBufferManager();
mtl::BufferRef oldBuffer = mBuffer;
bool useSharedMem = useSharedMemory(contextMtl, mUsage);
ANGLE_TRY(bufferManager.getBuffer(contextMtl, mGLSize, useSharedMem, mBuffer));
mBuffer->get().label = [NSString stringWithFormat:@"BufferMtl=%p(%lu)", this, ++mRevisionCount];
uint8_t *ptr = mBuffer->mapWithOpt(contextMtl, false, true);
std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset);
mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy);
if (offset > 0 || offset + sizeToCopy < mGLSize)
{
mtl::BlitCommandEncoder *blitEncoder =
contextMtl->getBlitCommandEncoderWithoutEndingRenderEncoder();
if (offset > 0)
{
// copy old data before updated region
blitEncoder->copyBuffer(oldBuffer, 0, mBuffer, 0, offset);
}
if (offset + sizeToCopy < mGLSize)
{
// copy old data after updated region
const size_t endOffset = offset + sizeToCopy;
const size_t endSizeToCopy = mGLSize - endOffset;
blitEncoder->copyBuffer(oldBuffer, endOffset, mBuffer, endOffset, endSizeToCopy);
}
}
bufferManager.returnBuffer(contextMtl, oldBuffer);
return angle::Result::Continue;
}
angle::Result BufferMtl::copyDataToExistingBufferViaCPU(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset)
{
uint8_t *ptr = mBuffer->map(contextMtl);
std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset);
mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy);
return angle::Result::Continue;
}
angle::Result BufferMtl::updateShadowCopyThenCopyShadowToNewBuffer(ContextMtl *contextMtl,
const uint8_t *srcPtr,
size_t sizeToCopy,
size_t offset)
{
// 1. Before copying data from client, we need to synchronize modified data from GPU to
// shadow copy first.
ensureShadowCopySyncedFromGPU(contextMtl);
// 2. Copy data from client to shadow copy.
std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset);
// 3. Copy data from shadow copy to GPU.
return commitShadowCopy(contextMtl);
}
angle::Result BufferMtl::setSubDataImpl(const gl::Context *context,
const void *data,
size_t size,
@ -594,68 +724,72 @@ angle::Result BufferMtl::setSubDataImpl(const gl::Context *context,
ASSERT(mBuffer);
ContextMtl *contextMtl = mtl::GetImpl(context);
ContextMtl *contextMtl = mtl::GetImpl(context);
const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures();
ANGLE_MTL_TRY(contextMtl, offset <= mBuffer->size());
ANGLE_MTL_TRY(contextMtl, offset <= mGLSize);
auto srcPtr = static_cast<const uint8_t *>(data);
auto sizeToCopy = std::min<size_t>(size, mBuffer->size() - offset);
auto sizeToCopy = std::min<size_t>(size, mGLSize - offset);
markConversionBuffersDirty();
if (mBufferPool.getMaxBuffers() == 1)
if (features.preferCpuForBuffersubdata.enabled)
{
ASSERT(mBuffer);
uint8_t *ptr = mBuffer->map(contextMtl);
std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset);
mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy);
return copyDataToExistingBufferViaCPU(contextMtl, srcPtr, sizeToCopy, offset);
}
if (mShadowCopy.size() > 0)
{
return updateShadowCopyThenCopyShadowToNewBuffer(contextMtl, srcPtr, sizeToCopy, offset);
}
else
{
ASSERT(mShadowCopy.size());
bool alwaysUseStagedBufferUpdates = features.alwaysUseStagedBufferUpdates.enabled;
// 1. Before copying data from client, we need to synchronize modified data from GPU to
// shadow copy first.
ensureShadowCopySyncedFromGPU(contextMtl);
// 2. Copy data from client to shadow copy.
std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset);
// 3. Copy data from shadow copy to GPU.
ANGLE_TRY(commitShadowCopy(context));
if (isOffsetAndSizeMetalBlitCompatible(offset, size) &&
(alwaysUseStagedBufferUpdates || mBuffer->isBeingUsedByGPU(contextMtl)))
{
if (alwaysUseStagedBufferUpdates || !isSafeToReadFromBufferViaBlit(contextMtl))
{
// We can't use the buffer now so copy the data
// to a staging buffer and blit it in
return updateExistingBufferViaBlitFromStagingBuffer(contextMtl, srcPtr, sizeToCopy,
offset);
}
else
{
return putDataInNewBufferAndStartUsingNewBuffer(contextMtl, srcPtr, sizeToCopy,
offset);
}
}
else
{
return copyDataToExistingBufferViaCPU(contextMtl, srcPtr, sizeToCopy, offset);
}
}
return angle::Result::Continue;
}
angle::Result BufferMtl::commitShadowCopy(const gl::Context *context)
angle::Result BufferMtl::commitShadowCopy(ContextMtl *contextMtl)
{
return commitShadowCopy(context, size());
return commitShadowCopy(contextMtl, mGLSize);
}
angle::Result BufferMtl::commitShadowCopy(const gl::Context *context, size_t size)
angle::Result BufferMtl::commitShadowCopy(ContextMtl *contextMtl, size_t size)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
mtl::BufferManager &bufferManager = contextMtl->getBufferManager();
bool useSharedMem = useSharedMemory(contextMtl, mUsage);
if (!size)
{
// Skip mapping if size to commit is zero.
// zero size is passed to allocate buffer only.
ANGLE_TRY(mBufferPool.allocate(contextMtl, mShadowCopy.size(), nullptr, &mBuffer, nullptr,
nullptr));
}
else
{
uint8_t *ptr = nullptr;
mBufferPool.releaseInFlightBuffers(contextMtl);
ANGLE_TRY(
mBufferPool.allocate(contextMtl, mShadowCopy.size(), &ptr, &mBuffer, nullptr, nullptr));
bufferManager.returnBuffer(contextMtl, mBuffer);
ANGLE_TRY(bufferManager.getBuffer(contextMtl, mGLSize, useSharedMem, mBuffer));
if (size)
{
uint8_t *ptr = mBuffer->mapWithOpt(contextMtl, false, true);
std::copy(mShadowCopy.data(), mShadowCopy.data() + size, ptr);
mBuffer->unmapAndFlushSubset(contextMtl, 0, size);
}
ANGLE_TRY(mBufferPool.commit(contextMtl));
return angle::Result::Continue;
}

Просмотреть файл

@ -17,6 +17,7 @@
#include "libANGLE/Context.h"
#include "libANGLE/renderer/ContextImpl.h"
#include "libANGLE/renderer/metal/ProvokingVertexHelper.h"
#include "libANGLE/renderer/metal/mtl_buffer_manager.h"
#include "libANGLE/renderer/metal/mtl_buffer_pool.h"
#include "libANGLE/renderer/metal/mtl_command_buffer.h"
#include "libANGLE/renderer/metal/mtl_context_device.h"
@ -369,6 +370,7 @@ class ContextMtl : public ContextImpl, public mtl::Context
// Will end current command encoder and start new blit command encoder. Unless a blit comamnd
// encoder is already started.
mtl::BlitCommandEncoder *getBlitCommandEncoder();
// Will end current command encoder and start new compute command encoder. Unless a compute
// command encoder is already started.
mtl::ComputeCommandEncoder *getComputeCommandEncoder();
@ -381,6 +383,8 @@ class ContextMtl : public ContextImpl, public mtl::Context
// Get the provoking vertex command encoder.
mtl::ComputeCommandEncoder *getIndexPreprocessingCommandEncoder();
bool isCurrentRenderEncoderSerial(uint64_t serial);
const mtl::ContextDevice &getMetalDevice() const { return mContextDevice; }
angle::Result copy2DTextureSlice0Level0ToWorkTexture(const mtl::TextureRef &srcTexture);
@ -390,6 +394,7 @@ class ContextMtl : public ContextImpl, public mtl::Context
const mtl::MipmapNativeLevel &mipNativeLevel,
uint32_t layerIndex);
const mtl::BufferRef &getWorkBuffer() const { return mWorkBuffer; }
mtl::BufferManager &getBufferManager() { return mBufferManager; }
private:
void ensureCommandBufferReady();
@ -600,6 +605,8 @@ class ContextMtl : public ContextImpl, public mtl::Context
MTLCullMode mCullMode;
bool mCullAllPolygons = false;
mtl::BufferManager mBufferManager;
// Lineloop and TriFan index buffer
mtl::BufferPool mLineLoopIndexBuffer;
mtl::BufferPool mLineLoopLastSegmentIndexBuffer;

Просмотреть файл

@ -1663,6 +1663,11 @@ void ContextMtl::endRenderEncoding(mtl::RenderCommandEncoder *encoder)
disableActiveOcclusionQueryInRenderPass();
}
if (mBlitEncoder.valid())
{
mBlitEncoder.endEncoding();
}
encoder->endEncoding();
// Resolve visibility results
@ -1755,6 +1760,16 @@ bool ContextMtl::hasStartedRenderPass(const mtl::RenderPassDesc &desc)
mRenderEncoder.renderPassDesc().equalIgnoreLoadStoreOptions(desc);
}
bool ContextMtl::isCurrentRenderEncoderSerial(uint64_t serial)
{
if (!mRenderEncoder.valid())
{
return false;
}
return serial == mRenderEncoder.getSerial();
}
// Get current render encoder
mtl::RenderCommandEncoder *ContextMtl::getRenderCommandEncoder()
{
@ -1856,6 +1871,11 @@ mtl::RenderCommandEncoder *ContextMtl::getRenderTargetCommandEncoder(
mtl::BlitCommandEncoder *ContextMtl::getBlitCommandEncoder()
{
if (mRenderEncoder.valid() || mComputeEncoder.valid())
{
endEncoding(true);
}
if (mBlitEncoder.valid())
{
return &mBlitEncoder;
@ -1882,6 +1902,11 @@ mtl::BlitCommandEncoder *ContextMtl::getBlitCommandEncoderWithoutEndingRenderEnc
mtl::ComputeCommandEncoder *ContextMtl::getComputeCommandEncoder()
{
if (mRenderEncoder.valid() || mBlitEncoder.valid())
{
endEncoding(true);
}
if (mComputeEncoder.valid())
{
return &mComputeEncoder;
@ -2725,7 +2750,8 @@ angle::Result ContextMtl::copyTextureSliceLevelToWorkBuffer(
// Expand the buffer if it is not big enough.
if (!mWorkBuffer || mWorkBuffer->size() < sizeInBytes)
{
ANGLE_TRY(mtl::Buffer::MakeBuffer(this, sizeInBytes, nullptr, &mWorkBuffer));
ANGLE_TRY(mtl::Buffer::MakeBufferWithSharedMemOpt(this, true, sizeInBytes, nullptr,
&mWorkBuffer));
}
gl::Rectangle region(0, 0, width, height);

Просмотреть файл

@ -1193,6 +1193,16 @@ void DisplayMtl::initializeFeatures()
ANGLE_FEATURE_CONDITION((&mFeatures), preemptivelyStartProvokingVertexCommandBuffer, isAMD());
ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseStagedBufferUpdates, isAMD());
ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseManagedStorageModeForBuffers, isAMD());
ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseSharedStorageModeForBuffers, isIntel());
ANGLE_FEATURE_CONDITION((&mFeatures), useShadowBuffersWhenAppropriate, isIntel());
// At least one of these must not be set.
ASSERT(!mFeatures.alwaysUseManagedStorageModeForBuffers.enabled ||
!mFeatures.alwaysUseSharedStorageModeForBuffers.enabled);
bool defaultDirectToMetal = true;
ANGLE_FEATURE_CONDITION((&mFeatures), directMetalGeneration, defaultDirectToMetal);

Просмотреть файл

@ -1611,6 +1611,7 @@ angle::Result FramebufferMtl::readPixelsImpl(const gl::Context *context,
return result;
}
if (texture->isBeingUsedByGPU(contextMtl))
{
contextMtl->flushCommandBuffer(mtl::WaitUntilFinished);

Просмотреть файл

@ -1460,7 +1460,7 @@ angle::Result ProgramMtl::legalizeUniformBufferOffsets(
// Has the content of the buffer has changed since last conversion?
if (conversion->dirty)
{
const uint8_t *srcBytes = bufferMtl->getClientShadowCopyData(context);
const uint8_t *srcBytes = bufferMtl->getBufferDataReadOnly(context);
srcBytes += offsetModulo;
size_t sizeToCopy = bufferMtl->size() - offsetModulo;
size_t bytesToAllocate = roundUp<size_t>(sizeToCopy, 16u);

Просмотреть файл

@ -1817,7 +1817,7 @@ angle::Result TextureMtl::setPerSliceSubImage(const gl::Context *context,
{
// NOTE(hqle): packed depth & stencil texture cannot copy from buffer directly, needs
// to split its depth & stencil data and copy separately.
const uint8_t *clientData = unpackBufferMtl->getClientShadowCopyData(contextMtl);
const uint8_t *clientData = unpackBufferMtl->getBufferDataReadOnly(contextMtl);
clientData += offset;
ANGLE_TRY(UploadTextureContents(context, mFormat.actualAngleFormat(), mtlArea,
mtl::kZeroNativeMipLevel, slice, clientData,
@ -1871,7 +1871,7 @@ angle::Result TextureMtl::convertAndSetPerSliceSubImage(const gl::Context *conte
mFormat.intendedAngleFormat().isBlock)
{
// Unsupported format, use CPU path.
const uint8_t *clientData = unpackBufferMtl->getClientShadowCopyData(contextMtl);
const uint8_t *clientData = unpackBufferMtl->getBufferDataReadOnly(contextMtl);
clientData += offset;
ANGLE_TRY(convertAndSetPerSliceSubImage(context, slice, mtlArea, internalFormat, type,
pixelsAngleFormat, pixelsRowPitch,

Просмотреть файл

@ -881,7 +881,7 @@ angle::Result VertexArrayMtl::convertIndexBuffer(const gl::Context *glContext,
{
// We shouldn't use GPU to convert when we are in a middle of a render pass.
ANGLE_TRY(StreamIndexData(contextMtl, &conversion->data,
idxBuffer->getClientShadowCopyData(contextMtl) + offsetModulo,
idxBuffer->getBufferDataReadOnly(contextMtl) + offsetModulo,
indexType, indexCount, glState.isPrimitiveRestartEnabled(),
&conversion->convertedBuffer, &conversion->convertedOffset));
}
@ -1061,7 +1061,7 @@ angle::Result VertexArrayMtl::convertVertexBufferCPU(ContextMtl *contextMtl,
ConversionBufferMtl *conversion)
{
const uint8_t *srcBytes = srcBuffer->getClientShadowCopyData(contextMtl);
const uint8_t *srcBytes = srcBuffer->getBufferDataReadOnly(contextMtl);
ANGLE_CHECK_GL_ALLOC(contextMtl, srcBytes);
VertexConversionBufferMtl *vertexConverison =
static_cast<VertexConversionBufferMtl *>(conversion);
@ -1151,4 +1151,4 @@ angle::Result VertexArrayMtl::convertVertexBufferGPU(const gl::Context *glContex
return angle::Result::Continue;
}
}
} // namespace rx

Просмотреть файл

@ -0,0 +1,94 @@
//
// Copyright 2022 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// mtl_buffer_manager.h:
// BufferManager manages buffers across all contexts for a single
// device.
//
#ifndef LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_
#define LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_
#include "common/FixedVector.h"
#include "libANGLE/renderer/metal/mtl_resources.h"
#include <vector>
namespace rx
{
class ContextMtl;
namespace mtl
{
// GL buffers are backed by Metal buffers. Which metal
// buffer is backing a particular GL buffer is fluid.
// The case being optimized is a loop of something like
//
// for 1..4
// glBufferSubData
// glDrawXXX
//
// You can't update a buffer in the middle of a render pass
// in metal so instead we'd end up using multiple buffers.
//
// Simple case, the call to `glBufferSubData` updates the
// entire buffer. In this case we'd end up with each call
// to `glBufferSubData` getting a new buffer from this
// BufferManager and copying the new data to it. We'd
// end up submitting this renderpass
//
// draw with buf1
// draw with buf2
// draw with buf3
// draw with buf4
//
// The GL buffer now references buf4. And buf1, buf2, buf3 and
// buf0 (the buffer that was previously referenced by the GL buffer)
// are all added to the inuse-list
//
// This macro enables showing the running totals of the various
// buckets of unused buffers.
// #define ANGLE_MTL_TRACK_BUFFER_MEM
class BufferManager
{
public:
BufferManager();
static constexpr size_t kMaxStagingBufferSize = 1024 * 1024;
static constexpr size_t kMaxSizePowerOf2 = 64;
angle::Result queueBlitCopyDataToBuffer(ContextMtl *contextMtl,
const void *srcPtr,
size_t sizeToCopy,
size_t offset,
mtl::BufferRef &dstMetalBuffer);
angle::Result getBuffer(ContextMtl *contextMtl,
size_t size,
bool useSharedMem,
mtl::BufferRef &bufferRef);
void returnBuffer(ContextMtl *contextMtl, mtl::BufferRef &bufferRef);
private:
typedef std::vector<mtl::BufferRef> BufferList;
void freeUnusedBuffers(ContextMtl *contextMtl);
void addBufferRefToFreeLists(mtl::BufferRef &bufferRef);
BufferList mInUseBuffers;
angle::FixedVector<BufferList, kMaxSizePowerOf2> mFreeBuffers[2];
#ifdef ANGLE_MTL_TRACK_BUFFER_MEM
angle::FixedVector<size_t, kMaxSizePowerOf2> mAllocations;
size_t mTotalMem = 0;
#endif
};
} // namespace mtl
} // namespace rx
#endif /* LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_ */

Просмотреть файл

@ -0,0 +1,202 @@
//
// Copyright 2022 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// mtl_buffer_manager.mm:
// Implements the class methods for BufferManager.
//
#include "libANGLE/renderer/metal/mtl_buffer_manager.h"
#include "libANGLE/renderer/metal/ContextMtl.h"
#include "libANGLE/renderer/metal/DisplayMtl.h"
namespace rx
{
namespace mtl
{
namespace
{
constexpr size_t Log2(size_t num)
{
return num <= 1 ? 0 : (1 + Log2(num / 2));
}
constexpr size_t Log2Ceil(size_t num)
{
size_t l = Log2(num);
size_t size = size_t(1) << l;
return num == size ? l : l + 1;
}
#ifdef ANGLE_MTL_TRACK_BUFFER_MEM
const char *memUnitSuffix(size_t powerOf2)
{
if (powerOf2 < 10)
{
return "b";
}
if (powerOf2 < 20)
{
return "k";
}
if (powerOf2 < 30)
{
return "M";
}
return "G";
}
size_t memUnitValue(size_t powerOf2)
{
if (powerOf2 < 10)
{
return 1u << powerOf2;
}
if (powerOf2 < 20)
{
return 1u << (powerOf2 - 10);
}
if (powerOf2 < 30)
{
return 1u << (powerOf2 - 20);
}
return 1u << (powerOf2 - 30);
}
#endif // ANGLE_MTL_TRACK_BUFFER_MEM
int sharedMemToIndex(bool useSharedMem)
{
return useSharedMem ? 1 : 0;
}
} // namespace
BufferManager::BufferManager()
#ifdef ANGLE_MTL_TRACK_BUFFER_MEM
: mAllocations(kMaxSizePowerOf2, 0)
#endif
{}
void BufferManager::freeUnusedBuffers(ContextMtl *contextMtl)
{
// Scan for the first buffer still in use.
BufferList::iterator firstInUseIter =
std::find_if(mInUseBuffers.begin(), mInUseBuffers.end(),
[&contextMtl](auto ref) { return ref->isBeingUsedByGPU(contextMtl); });
// Move unused buffers to the free lists
for (BufferList::iterator it = mInUseBuffers.begin(); it != firstInUseIter; ++it)
{
addBufferRefToFreeLists(*it);
}
mInUseBuffers.erase(mInUseBuffers.begin(), firstInUseIter);
}
void BufferManager::addBufferRefToFreeLists(mtl::BufferRef &bufferRef)
{
const size_t bucketNdx = Log2Ceil(bufferRef->size());
ASSERT(bucketNdx < kMaxSizePowerOf2);
int sharedNdx = sharedMemToIndex(bufferRef->get().storageMode == MTLStorageModeShared);
mFreeBuffers[sharedNdx][bucketNdx].push_back(bufferRef);
}
void BufferManager::returnBuffer(ContextMtl *contextMtl, BufferRef &bufferRef)
{
if (bufferRef->isBeingUsedByGPU(contextMtl))
{
mInUseBuffers.push_back(bufferRef);
}
else
{
addBufferRefToFreeLists(bufferRef);
}
}
angle::Result BufferManager::getBuffer(ContextMtl *contextMtl,
size_t size,
bool useSharedMem,
BufferRef &bufferRef)
{
freeUnusedBuffers(contextMtl);
const size_t bucketNdx = Log2Ceil(size);
const int sharedNdx = sharedMemToIndex(useSharedMem);
BufferList &freeBuffers = mFreeBuffers[sharedNdx][bucketNdx];
// If there are free buffers grab one
if (!freeBuffers.empty())
{
bufferRef = freeBuffers.back();
freeBuffers.pop_back();
return angle::Result::Continue;
}
// Create a new one
mtl::BufferRef newBufferRef;
size_t allocSize = size_t(1) << bucketNdx;
ASSERT(allocSize >= size);
ANGLE_TRY(mtl::Buffer::MakeBufferWithSharedMemOpt(contextMtl, useSharedMem, allocSize, nullptr,
&newBufferRef));
#ifdef ANGLE_MTL_TRACK_BUFFER_MEM
{
mTotalMem += allocSize;
mAllocations[bucketNdx]++;
fprintf(stderr, "totalMem: %zu, ", mTotalMem);
size_t numBuffers = 0;
for (size_t i = 0; i < kMaxSizePowerOf2; ++i)
{
if (mAllocations[i])
{
numBuffers += mAllocations[i];
fprintf(stderr, "%zu%s: %zu, ", memUnitValue(i), memUnitSuffix(i), mAllocations[i]);
}
}
fprintf(stderr, " total: %zu\n", numBuffers);
}
#endif
bufferRef = newBufferRef;
return angle::Result::Continue;
}
angle::Result BufferManager::queueBlitCopyDataToBuffer(ContextMtl *contextMtl,
const void *srcPtr,
size_t sizeToCopy,
size_t offset,
mtl::BufferRef &dstMetalBuffer)
{
const uint8 *src = reinterpret_cast<const uint8 *>(srcPtr);
bool useShared =
!contextMtl->getDisplay()->getFeatures().alwaysUseManagedStorageModeForBuffers.enabled;
for (size_t srcOffset = 0; srcOffset < sizeToCopy; srcOffset += kMaxStagingBufferSize)
{
size_t subSizeToCopy = std::min(kMaxStagingBufferSize, sizeToCopy - srcOffset);
mtl::BufferRef bufferRef;
ANGLE_TRY(getBuffer(contextMtl, subSizeToCopy, useShared, bufferRef));
// copy data to buffer
uint8_t *ptr = bufferRef->mapWithOpt(contextMtl, false, true);
std::copy(src + srcOffset, src + srcOffset + subSizeToCopy, ptr);
bufferRef->unmapAndFlushSubset(contextMtl, 0, subSizeToCopy);
// queue blit
mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder();
blitEncoder->copyBuffer(bufferRef, 0, dstMetalBuffer, offset + srcOffset, subSizeToCopy);
returnBuffer(contextMtl, bufferRef);
}
return angle::Result::Continue;
}
} // namespace mtl
} // namespace rx

Просмотреть файл

@ -80,6 +80,8 @@ class CommandQueue final : public WrappedObject<id<MTLCommandQueue>>, angle::Non
AutoObjCPtr<id<MTLCommandBuffer>> makeMetalCommandBuffer(uint64_t *queueSerialOut);
void onCommandBufferCommitted(id<MTLCommandBuffer> buf, uint64_t serial);
uint64_t getNextRenderEncoderSerial();
private:
void onCommandBufferCompleted(id<MTLCommandBuffer> buf, uint64_t serial);
using ParentClass = WrappedObject<id<MTLCommandQueue>>;
@ -94,6 +96,7 @@ class CommandQueue final : public WrappedObject<id<MTLCommandQueue>>, angle::Non
uint64_t mQueueSerialCounter = 1;
std::atomic<uint64_t> mCommittedBufferSerial{0};
std::atomic<uint64_t> mCompletedBufferSerial{0};
uint64_t mRenderEncoderCounter = 1;
mutable std::mutex mLock;
};
@ -497,6 +500,8 @@ class RenderCommandEncoder final : public CommandEncoder
const RenderPassDesc &renderPassDesc() const { return mRenderPassDesc; }
bool hasDrawCalls() const { return mHasDrawCalls; }
uint64_t getSerial() const { return mSerial; }
private:
// Override CommandEncoder
id<MTLRenderCommandEncoder> get()
@ -541,6 +546,7 @@ class RenderCommandEncoder final : public CommandEncoder
RenderCommandEncoderStates mStateCache = {};
bool mPipelineStateSet = false;
const uint64_t mSerial = 0;
};
class BlitCommandEncoder final : public CommandEncoder

Просмотреть файл

@ -11,6 +11,7 @@
#include "libANGLE/renderer/metal/mtl_command_buffer.h"
#include <cassert>
#include <cstdint>
#if ANGLE_MTL_SIMULATE_DISCARD_FRAMEBUFFER
# include <random>
#endif
@ -579,6 +580,11 @@ void CommandQueue::onCommandBufferCompleted(id<MTLCommandBuffer> buf, uint64_t s
std::memory_order_relaxed);
}
uint64_t CommandQueue::getNextRenderEncoderSerial()
{
return ++mRenderEncoderCounter;
}
// CommandBuffer implementation
CommandBuffer::CommandBuffer(CommandQueue *cmdQueue) : mCmdQueue(*cmdQueue) {}
@ -1065,7 +1071,9 @@ void RenderCommandEncoderStates::reset()
// RenderCommandEncoder implemtation
RenderCommandEncoder::RenderCommandEncoder(CommandBuffer *cmdBuffer,
const OcclusionQueryPool &queryPool)
: CommandEncoder(cmdBuffer, RENDER), mOcclusionQueryPool(queryPool)
: CommandEncoder(cmdBuffer, RENDER),
mOcclusionQueryPool(queryPool),
mSerial(cmdBuffer->cmdQueue().getNextRenderEncoderSerial())
{
ANGLE_MTL_OBJC_SCOPE
{
@ -1556,6 +1564,7 @@ RenderCommandEncoder &RenderCommandEncoder::setBufferForWrite(gl::ShaderType sha
return *this;
}
buffer->setLastWritingRenderEncoderSerial(mSerial);
cmdBuffer().setWriteDependency(buffer);
id<MTLBuffer> mtlBuffer = (buffer ? buffer->get() : nil);
@ -2183,10 +2192,14 @@ BlitCommandEncoder &BlitCommandEncoder::synchronizeResource(Buffer *buffer)
}
#if TARGET_OS_OSX || TARGET_OS_MACCATALYST
// Only MacOS has separated storage for resource on CPU and GPU and needs explicit
// synchronization
cmdBuffer().setReadDependency(buffer);
[get() synchronizeResource:buffer->get()];
if (buffer->get().storageMode == MTLStorageModeManaged)
{
// Only MacOS has separated storage for resource on CPU and GPU and needs explicit
// synchronization
cmdBuffer().setReadDependency(buffer);
[get() synchronizeResource:buffer->get()];
}
#endif
return *this;
}

Просмотреть файл

@ -106,7 +106,7 @@ constexpr uint32_t kMaxShaderXFBs = gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_SE
// The max size of a buffer that will be allocated in shared memory.
// NOTE(hqle): This is just a hint. There is no official document on what is the max allowed size
// for shared memory.
constexpr size_t kSharedMemBufferMaxBufSizeHint = 128 * 1024;
constexpr size_t kSharedMemBufferMaxBufSizeHint = 256 * 1024;
constexpr size_t kDefaultAttributeSize = 4 * sizeof(float);

Просмотреть файл

@ -2094,8 +2094,7 @@ angle::Result IndexGeneratorUtils::generateTriFanBufferFromElementsArray(
contextMtl->getRenderCommandEncoder()))
{
IndexGenerationParams cpuPathParams = params;
cpuPathParams.indices =
elementBufferMtl->getClientShadowCopyData(contextMtl) + srcOffset;
cpuPathParams.indices = elementBufferMtl->getBufferDataReadOnly(contextMtl) + srcOffset;
return generateTriFanBufferFromElementsArrayCPU(contextMtl, cpuPathParams,
indicesGenerated);
}
@ -2223,8 +2222,7 @@ angle::Result IndexGeneratorUtils::generateLineLoopBufferFromElementsArray(
contextMtl->getRenderCommandEncoder()))
{
IndexGenerationParams cpuPathParams = params;
cpuPathParams.indices =
elementBufferMtl->getClientShadowCopyData(contextMtl) + srcOffset;
cpuPathParams.indices = elementBufferMtl->getBufferDataReadOnly(contextMtl) + srcOffset;
return generateLineLoopBufferFromElementsArrayCPU(contextMtl, cpuPathParams,
indicesGenerated);
}

Просмотреть файл

@ -56,6 +56,7 @@ class Resource : angle::NonCopyable
bool hasPendingWorks(Context *context) const;
void setUsedByCommandBufferWithQueueSerial(uint64_t serial, bool writing);
void setWrittenToByRenderEncoder(uint64_t serial);
uint64_t getCommandBufferQueueSerial() const { return mUsageRef->cmdBufferQueueSerial; }
@ -71,6 +72,15 @@ class Resource : angle::NonCopyable
bool isCPUReadMemDirty() const { return mUsageRef->cpuReadMemDirty; }
void resetCPUReadMemDirty() { mUsageRef->cpuReadMemDirty = false; }
bool getLastWritingRenderEncoderSerial() const
{
return mUsageRef->lastWritingRenderEncoderSerial;
}
void setLastWritingRenderEncoderSerial(uint64_t serial) const
{
mUsageRef->lastWritingRenderEncoderSerial = serial;
}
virtual size_t estimatedByteSize() const = 0;
virtual id getID() const = 0;
@ -98,6 +108,9 @@ class Resource : angle::NonCopyable
// This flag is useful for BufferMtl to know whether it should update the shadow copy
bool cpuReadMemDirty = false;
// The id of the last render encoder to write to this resource
uint64_t lastWritingRenderEncoderSerial = 0;
};
// One resource object might just be a view of another resource. For example, a texture 2d

Просмотреть файл

@ -1054,8 +1054,9 @@ void Buffer::flush(ContextMtl *context, size_t offsetWritten, size_t sizeWritten
{
if (get().storageMode == MTLStorageModeManaged)
{
size_t startOffset = std::min(offsetWritten, size());
size_t endOffset = std::min(offsetWritten + sizeWritten, size());
size_t bufferSize = size();
size_t startOffset = std::min(offsetWritten, bufferSize);
size_t endOffset = std::min(offsetWritten + sizeWritten, bufferSize);
size_t clampedSize = endOffset - startOffset;
if (clampedSize > 0)
{

Просмотреть файл

@ -1055,6 +1055,149 @@ TEST_P(BufferDataTestES3, NoBufferInitDataCopyBug)
ASSERT_GL_NO_ERROR();
}
// This a shortened version of dEQP functional.buffer.copy.basic.array_copy_read. It provoked
// a bug in copyBufferSubData. The bug appeared to be that conversion buffers were not marked
// as dirty and therefore after copyBufferSubData the next draw call using the buffer that
// just had data copied to it was not re-converted. It's not clear to me how this ever worked
// or why changes to bufferSubData from
// https://chromium-review.googlesource.com/c/angle/angle/+/3842641 made this issue appear and
// why it wasn't already broken.
TEST_P(BufferDataTestES3, CopyBufferSubDataDraw)
{
const char simpleVertex[] = R"(attribute vec2 position;
attribute vec4 color;
varying vec4 vColor;
void main()
{
gl_Position = vec4(position, 0, 1);
vColor = color;
}
)";
const char simpleFragment[] = R"(precision mediump float;
varying vec4 vColor;
void main()
{
gl_FragColor = vColor;
}
)";
ANGLE_GL_PROGRAM(program, simpleVertex, simpleFragment);
glUseProgram(program);
GLint colorLoc = glGetAttribLocation(program, "color");
ASSERT_NE(-1, colorLoc);
GLint posLoc = glGetAttribLocation(program, "position");
ASSERT_NE(-1, posLoc);
glClearColor(0, 0, 0, 0);
GLBuffer srcBuffer; // green
GLBuffer dstBuffer; // red
constexpr size_t numElements = 399;
std::vector<GLColorRGB> reds(numElements, GLColorRGB::red);
std::vector<GLColorRGB> greens(numElements, GLColorRGB::green);
constexpr size_t sizeOfElem = sizeof(decltype(greens)::value_type);
constexpr size_t sizeInBytes = numElements * sizeOfElem;
glBindBuffer(GL_ARRAY_BUFFER, srcBuffer);
glBufferData(GL_ARRAY_BUFFER, sizeInBytes, greens.data(), GL_STREAM_DRAW);
glBindBuffer(GL_COPY_READ_BUFFER, dstBuffer);
glBufferData(GL_COPY_READ_BUFFER, sizeInBytes, reds.data(), GL_STREAM_DRAW);
ASSERT_GL_NO_ERROR();
constexpr size_t numQuads = numElements / 4;
// Generate quads that fill clip space to use all the vertex colors
std::vector<float> positions(numQuads * 4 * 2);
for (size_t quad = 0; quad < numQuads; ++quad)
{
size_t offset = quad * 4 * 2;
float x0 = float(quad + 0) / numQuads * 2.0f - 1.0f;
float x1 = float(quad + 1) / numQuads * 2.0f - 1.0f;
/*
2--3
| |
0--1
*/
positions[offset + 0] = x0;
positions[offset + 1] = -1;
positions[offset + 2] = x1;
positions[offset + 3] = -1;
positions[offset + 4] = x0;
positions[offset + 5] = 1;
positions[offset + 6] = x1;
positions[offset + 7] = 1;
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
glEnableVertexAttribArray(posLoc);
glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, positions.data());
ASSERT_GL_NO_ERROR();
glBindBuffer(GL_ARRAY_BUFFER, srcBuffer);
glEnableVertexAttribArray(colorLoc);
glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr);
ASSERT_GL_NO_ERROR();
glClear(GL_COLOR_BUFFER_BIT);
std::vector<GLushort> indices(numQuads * 6);
for (size_t quad = 0; quad < numQuads; ++quad)
{
size_t ndx = quad * 4;
size_t offset = quad * 6;
indices[offset + 0] = ndx;
indices[offset + 1] = ndx + 1;
indices[offset + 2] = ndx + 2;
indices[offset + 3] = ndx + 2;
indices[offset + 4] = ndx + 1;
indices[offset + 5] = ndx + 3;
}
GLBuffer indexBuffer;
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(decltype(indices)::value_type),
indices.data(), GL_STATIC_DRAW);
// Draw with srcBuffer (green)
glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0);
EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green);
ASSERT_GL_NO_ERROR();
// Draw with dstBuffer (red)
glBindBuffer(GL_ARRAY_BUFFER, dstBuffer);
glEnableVertexAttribArray(colorLoc);
glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr);
glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0);
EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::red);
ASSERT_GL_NO_ERROR();
// Copy src to dst. Yes, we're using GL_COPY_READ_BUFFER as dest because that's what the dEQP
// test was testing.
glBindBuffer(GL_ARRAY_BUFFER, srcBuffer);
glBindBuffer(GL_COPY_READ_BUFFER, dstBuffer);
glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_READ_BUFFER, 0, 0, sizeInBytes);
ASSERT_GL_NO_ERROR();
// Draw with srcBuffer. It should still be green.
glBindBuffer(GL_ARRAY_BUFFER, srcBuffer);
glEnableVertexAttribArray(colorLoc);
glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr);
glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0);
EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green);
ASSERT_GL_NO_ERROR();
// Draw with dstBuffer. It should now be green too.
glBindBuffer(GL_ARRAY_BUFFER, dstBuffer);
glEnableVertexAttribArray(colorLoc);
glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr);
glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0);
EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green);
ASSERT_GL_NO_ERROR();
}
// Ensures that calling glBufferData on a mapped buffer results in an unmapped buffer
TEST_P(BufferDataTestES3, BufferDataUnmap)
{

Просмотреть файл

@ -38,6 +38,9 @@ constexpr PackedEnumMap<Feature, const char *> kFeatureNames = {{
"allowTranslateUniformBlockToStructuredBuffer"},
{Feature::AlwaysCallUseProgramAfterLink, "alwaysCallUseProgramAfterLink"},
{Feature::AlwaysUnbindFramebufferTexture2D, "alwaysUnbindFramebufferTexture2D"},
{Feature::AlwaysUseManagedStorageModeForBuffers, "alwaysUseManagedStorageModeForBuffers"},
{Feature::AlwaysUseSharedStorageModeForBuffers, "alwaysUseSharedStorageModeForBuffers"},
{Feature::AlwaysUseStagedBufferUpdates, "alwaysUseStagedBufferUpdates"},
{Feature::AsyncCommandQueue, "asyncCommandQueue"},
{Feature::Avoid1BitAlphaTextureFormats, "avoid1BitAlphaTextureFormats"},
{Feature::BindTransformFeedbackBufferBeforeBindBufferRange,
@ -187,6 +190,7 @@ constexpr PackedEnumMap<Feature, const char *> kFeatureNames = {{
{Feature::PreemptivelyStartProvokingVertexCommandBuffer,
"preemptivelyStartProvokingVertexCommandBuffer"},
{Feature::PreferAggregateBarrierCalls, "preferAggregateBarrierCalls"},
{Feature::PreferCpuForBuffersubdata, "preferCpuForBuffersubdata"},
{Feature::PreferCPUForBufferSubData, "preferCPUForBufferSubData"},
{Feature::PreferDeviceLocalMemoryHostVisible, "preferDeviceLocalMemoryHostVisible"},
{Feature::PreferDrawClearOverVkCmdClearAttachments, "preferDrawClearOverVkCmdClearAttachments"},
@ -317,6 +321,7 @@ constexpr PackedEnumMap<Feature, const char *> kFeatureNames = {{
{Feature::UseInstancedPointSpriteEmulation, "useInstancedPointSpriteEmulation"},
{Feature::UseMultipleDescriptorsForExternalFormats, "useMultipleDescriptorsForExternalFormats"},
{Feature::UseNonZeroStencilWriteMaskStaticState, "useNonZeroStencilWriteMaskStaticState"},
{Feature::UseShadowBuffersWhenAppropriate, "useShadowBuffersWhenAppropriate"},
{Feature::UseSystemMemoryForConstantBuffers, "useSystemMemoryForConstantBuffers"},
{Feature::UseUnusedBlocksWithStandardOrSharedLayout,
"useUnusedBlocksWithStandardOrSharedLayout"},

Просмотреть файл

@ -37,6 +37,9 @@ enum class Feature
AllowTranslateUniformBlockToStructuredBuffer,
AlwaysCallUseProgramAfterLink,
AlwaysUnbindFramebufferTexture2D,
AlwaysUseManagedStorageModeForBuffers,
AlwaysUseSharedStorageModeForBuffers,
AlwaysUseStagedBufferUpdates,
AsyncCommandQueue,
Avoid1BitAlphaTextureFormats,
BindTransformFeedbackBufferBeforeBindBufferRange,
@ -176,6 +179,7 @@ enum class Feature
PrecisionSafeDivision,
PreemptivelyStartProvokingVertexCommandBuffer,
PreferAggregateBarrierCalls,
PreferCpuForBuffersubdata,
PreferCPUForBufferSubData,
PreferDeviceLocalMemoryHostVisible,
PreferDrawClearOverVkCmdClearAttachments,
@ -293,6 +297,7 @@ enum class Feature
UseInstancedPointSpriteEmulation,
UseMultipleDescriptorsForExternalFormats,
UseNonZeroStencilWriteMaskStaticState,
UseShadowBuffersWhenAppropriate,
UseSystemMemoryForConstantBuffers,
UseUnusedBlocksWithStandardOrSharedLayout,
VertexIDDoesNotIncludeBaseVertex,