diff --git a/include/platform/FeaturesMtl_autogen.h b/include/platform/FeaturesMtl_autogen.h index 116a0ee8d..fc93237d2 100644 --- a/include/platform/FeaturesMtl_autogen.h +++ b/include/platform/FeaturesMtl_autogen.h @@ -232,6 +232,31 @@ struct FeaturesMtl : FeatureSetBase "uploadDataToIosurfacesWithStagingBuffers", FeatureCategory::MetalWorkarounds, "When uploading data to IOSurface-backed textures, use a staging buffer.", &members, "http://anglebug.com/7573"}; + + FeatureInfo alwaysUseStagedBufferUpdates = { + "alwaysUseStagedBufferUpdates", FeatureCategory::MetalFeatures, + "Always update buffers by copying the data to a staging buffer and then blitting it to the " + "actual buffer", + &members, "http://anglebug.com/7544"}; + + FeatureInfo useShadowBuffersWhenAppropriate = { + "useShadowBuffersWhenAppropriate", FeatureCategory::MetalFeatures, + "On some architectures using a shadow buffer can be faster for certain size buffers", + &members, "http://anglebug.com/7544"}; + + FeatureInfo alwaysUseManagedStorageModeForBuffers = { + "alwaysUseManagedStorageModeForBuffers", FeatureCategory::MetalFeatures, + "Metal buffers can be managed, shared, or private. Sometimes managed is fastest", &members, + "http://anglebug.com/7544"}; + + FeatureInfo alwaysUseSharedStorageModeForBuffers = { + "alwaysUseSharedStorageModeForBuffers", FeatureCategory::MetalFeatures, + "Metal buffers can be managed, shared, or private. Sometimes shared is fastest", &members, + "http://anglebug.com/7544"}; + + FeatureInfo preferCpuForBuffersubdata = { + "preferCpuForBuffersubdata", FeatureCategory::MetalFeatures, + "Makes bufferSubData always update via CPU", &members, "http://anglebug.com/7544"}; }; inline FeaturesMtl::FeaturesMtl() = default; diff --git a/include/platform/mtl_features.json b/include/platform/mtl_features.json index ccd383ece..c928f8c8e 100644 --- a/include/platform/mtl_features.json +++ b/include/platform/mtl_features.json @@ -243,6 +243,46 @@ "When uploading data to IOSurface-backed textures, use a staging buffer." ], "issue": "http://anglebug.com/7573" + }, + { + "name": "always_use_staged_buffer_updates", + "category": "Features", + "description": [ + "Always update buffers by copying the data to a staging buffer and then blitting it to the actual buffer" + ], + "issue": "http://anglebug.com/7544" + }, + { + "name": "use_shadow_buffers_when_appropriate", + "category": "Features", + "description": [ + "On some architectures using a shadow buffer can be faster for certain size buffers" + ], + "issue": "http://anglebug.com/7544" + }, + { + "name": "always_use_managed_storage_mode_for_buffers", + "category": "Features", + "description": [ + "Metal buffers can be managed, shared, or private. Sometimes managed is fastest" + ], + "issue": "http://anglebug.com/7544" + }, + { + "name": "always_use_shared_storage_mode_for_buffers", + "category": "Features", + "description": [ + "Metal buffers can be managed, shared, or private. Sometimes shared is fastest" + ], + "issue": "http://anglebug.com/7544" + }, + { + "name": "prefer_cpu_for_buffersubdata", + "category": "Features", + "description": [ + "Makes bufferSubData always update via CPU" + ], + "issue": "http://anglebug.com/7544" } ] } diff --git a/scripts/code_generation_hashes/ANGLE_features.json b/scripts/code_generation_hashes/ANGLE_features.json index 66d00a054..5da2beace 100644 --- a/scripts/code_generation_hashes/ANGLE_features.json +++ b/scripts/code_generation_hashes/ANGLE_features.json @@ -4,7 +4,7 @@ "include/platform/FeaturesGL_autogen.h": "c192145f3939d4d0bf85a39649e0c14e", "include/platform/FeaturesMtl_autogen.h": - "80c0f3379882d1f67e523a3a1530cd79", + "c31c1c77040ef119dfaf882b5b5e65ab", "include/platform/FeaturesVk_autogen.h": "03f5b51f08b6cb4f831764aa4848f399", "include/platform/FrontendFeatures_autogen.h": @@ -18,11 +18,11 @@ "include/platform/gl_features.json": "a50e9bd2fa9eb0685d9b1c118a21ad2c", "include/platform/mtl_features.json": - "9833c17145ba2223da2e607a9340afda", + "408385ed8fa29652e23a6338faec6d2f", "include/platform/vk_features.json": "a0dd571e23e0bd521eb42d72a0863297", "util/angle_features_autogen.cpp": - "b6a2d2cac7d30b6c08d9398fed38a14c", + "d43086098956bfd4374284a05cfb884d", "util/angle_features_autogen.h": - "1e4b7c6e89ee370d052fa7f0c48c11c6" + "80421f1223abdee293434a2c7f8ff3bc" } \ No newline at end of file diff --git a/src/libANGLE/renderer/metal/BUILD.gn b/src/libANGLE/renderer/metal/BUILD.gn index 9f3aa91e7..5705faf1e 100644 --- a/src/libANGLE/renderer/metal/BUILD.gn +++ b/src/libANGLE/renderer/metal/BUILD.gn @@ -51,6 +51,8 @@ _metal_backend_sources = [ "TransformFeedbackMtl.mm", "VertexArrayMtl.h", "VertexArrayMtl.mm", + "mtl_buffer_manager.h", + "mtl_buffer_manager.mm", "mtl_buffer_pool.h", "mtl_buffer_pool.mm", "mtl_command_buffer.h", diff --git a/src/libANGLE/renderer/metal/BufferMtl.h b/src/libANGLE/renderer/metal/BufferMtl.h index c20690c62..4313088d8 100644 --- a/src/libANGLE/renderer/metal/BufferMtl.h +++ b/src/libANGLE/renderer/metal/BufferMtl.h @@ -151,7 +151,8 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl size_t count, std::pair *outIndices); - const uint8_t *getClientShadowCopyData(ContextMtl *contextMtl); + const uint8_t *getBufferDataReadOnly(ContextMtl *contextMtl); + bool isSafeToReadFromBufferViaBlit(ContextMtl *contextMtl); ConversionBufferMtl *getVertexConversionBuffer(ContextMtl *context, angle::FormatID formatID, @@ -186,27 +187,36 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl size_t size, size_t offset); - angle::Result commitShadowCopy(const gl::Context *context); - angle::Result commitShadowCopy(const gl::Context *context, size_t size); + angle::Result commitShadowCopy(ContextMtl *contextMtl); + angle::Result commitShadowCopy(ContextMtl *contextMtl, size_t size); void markConversionBuffersDirty(); void clearConversionBuffers(); + angle::Result putDataInNewBufferAndStartUsingNewBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset); + angle::Result updateExistingBufferViaBlitFromStagingBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset); + angle::Result copyDataToExistingBufferViaCPU(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset); + angle::Result updateShadowCopyThenCopyShadowToNewBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset); + bool clientShadowCopyDataNeedSync(ContextMtl *contextMtl); void ensureShadowCopySyncedFromGPU(ContextMtl *contextMtl); uint8_t *syncAndObtainShadowCopy(ContextMtl *contextMtl); - // Convenient method - const uint8_t *getClientShadowCopyData(const gl::Context *context) - { - return getClientShadowCopyData(mtl::GetImpl(context)); - } - // Client side shadow buffer + // Optional client side shadow buffer angle::MemoryBuffer mShadowCopy; - // GPU side buffers pool - mtl::BufferPool mBufferPool; - // A cache of converted vertex data. std::vector mVertexConversionBuffers; @@ -224,6 +234,9 @@ class BufferMtl : public BufferImpl, public BufferHolderMtl }; std::optional mRestartRangeCache; std::vector mRestartIndices; + size_t mGLSize = 0; // size GL asked for (vs size we actually allocated) + size_t mRevisionCount = 0; // for generating labels only + gl::BufferUsage mUsage; }; class SimpleWeakBufferHolderMtl : public BufferHolderMtl diff --git a/src/libANGLE/renderer/metal/BufferMtl.mm b/src/libANGLE/renderer/metal/BufferMtl.mm index c8769efa3..f72f9432b 100644 --- a/src/libANGLE/renderer/metal/BufferMtl.mm +++ b/src/libANGLE/renderer/metal/BufferMtl.mm @@ -13,6 +13,7 @@ #include "common/utilities.h" #include "libANGLE/renderer/metal/ContextMtl.h" #include "libANGLE/renderer/metal/DisplayMtl.h" +#include "libANGLE/renderer/metal/mtl_buffer_manager.h" namespace rx { @@ -39,6 +40,12 @@ angle::Result GetFirstLastIndices(const IndexType *indices, return angle::Result::Continue; } +bool isOffsetAndSizeMetalBlitCompatible(size_t offset, size_t size) +{ + // Metal requires offset and size to be multiples of 4 + return offset % 4 == 0 && size % 4 == 0; +} + } // namespace // ConversionBufferMtl implementation. @@ -88,9 +95,7 @@ VertexConversionBufferMtl::VertexConversionBufferMtl(ContextMtl *context, {} // BufferMtl implementation -BufferMtl::BufferMtl(const gl::BufferState &state) - : BufferImpl(state), mBufferPool(/** alwaysAllocNewBuffer */ true) -{} +BufferMtl::BufferMtl(const gl::BufferState &state) : BufferImpl(state) {} BufferMtl::~BufferMtl() {} @@ -98,8 +103,13 @@ void BufferMtl::destroy(const gl::Context *context) { ContextMtl *contextMtl = mtl::GetImpl(context); mShadowCopy.clear(); - mBufferPool.destroy(contextMtl); - mBuffer = nullptr; + + // if there's a buffer, give it back to the buffer manager + if (mBuffer) + { + contextMtl->getBufferManager().returnBuffer(contextMtl, mBuffer); + mBuffer = nullptr; + } clearConversionBuffers(); } @@ -136,19 +146,30 @@ angle::Result BufferMtl::copySubData(const gl::Context *context, ContextMtl *contextMtl = mtl::GetImpl(context); auto srcMtl = GetAs(source); - if (srcMtl->clientShadowCopyDataNeedSync(contextMtl) || mBuffer->isBeingUsedByGPU(contextMtl)) - { - // If shadow copy requires a synchronization then use blit command instead. - // It might break a pending render pass, but still faster than synchronization with - // GPU. - mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder(); - blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset, - size); + markConversionBuffersDirty(); - return angle::Result::Continue; + if (mShadowCopy.size() > 0) + { + if (srcMtl->clientShadowCopyDataNeedSync(contextMtl) || + mBuffer->isBeingUsedByGPU(contextMtl)) + { + // If shadow copy requires a synchronization then use blit command instead. + // It might break a pending render pass, but still faster than synchronization with + // GPU. + mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder(); + blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset, + size); + + return angle::Result::Continue; + } + return setSubDataImpl(context, srcMtl->getBufferDataReadOnly(contextMtl) + sourceOffset, + size, destOffset); } - return setSubDataImpl(context, srcMtl->getClientShadowCopyData(contextMtl) + sourceOffset, size, - destOffset); + + mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder(); + blitEncoder->copyBuffer(srcMtl->getCurrentBuffer(), sourceOffset, mBuffer, destOffset, size); + + return angle::Result::Continue; } angle::Result BufferMtl::map(const gl::Context *context, GLenum access, void **mapPtr) @@ -176,7 +197,7 @@ angle::Result BufferMtl::mapRange(const gl::Context *context, if (mapPtr) { ContextMtl *contextMtl = mtl::GetImpl(context); - if (mBufferPool.getMaxBuffers() == 1) + if (mShadowCopy.size() == 0) { *mapPtr = mBuffer->mapWithOpt(contextMtl, (access & GL_MAP_WRITE_BIT) == 0, access & GL_MAP_UNSYNCHRONIZED_BIT) + @@ -199,7 +220,7 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result) markConversionBuffersDirty(); - if (mBufferPool.getMaxBuffers() == 1) + if (mShadowCopy.size() == 0) { ASSERT(mBuffer); if (mState.getAccessFlags() & GL_MAP_WRITE_BIT) @@ -215,8 +236,6 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result) } else { - ASSERT(mShadowCopy.size()); - if (mState.getAccessFlags() & GL_MAP_UNSYNCHRONIZED_BIT) { // Copy the mapped region without synchronization with GPU @@ -228,7 +247,7 @@ angle::Result BufferMtl::unmap(const gl::Context *context, GLboolean *result) else { // commit shadow copy data to GPU synchronously - ANGLE_TRY(commitShadowCopy(context)); + ANGLE_TRY(commitShadowCopy(contextMtl)); } } @@ -247,7 +266,7 @@ angle::Result BufferMtl::getIndexRange(const gl::Context *context, bool primitiveRestartEnabled, gl::IndexRange *outRange) { - const uint8_t *indices = getClientShadowCopyData(mtl::GetImpl(context)) + offset; + const uint8_t *indices = getBufferDataReadOnly(mtl::GetImpl(context)) + offset; *outRange = gl::ComputeIndexRange(type, indices, count, primitiveRestartEnabled); @@ -260,7 +279,7 @@ angle::Result BufferMtl::getFirstLastIndices(ContextMtl *contextMtl, size_t count, std::pair *outIndices) { - const uint8_t *indices = getClientShadowCopyData(contextMtl) + offset; + const uint8_t *indices = getBufferDataReadOnly(contextMtl) + offset; switch (type) { @@ -283,10 +302,9 @@ void BufferMtl::onDataChanged() markConversionBuffersDirty(); } -/* public */ -const uint8_t *BufferMtl::getClientShadowCopyData(ContextMtl *contextMtl) +const uint8_t *BufferMtl::getBufferDataReadOnly(ContextMtl *contextMtl) { - if (mBufferPool.getMaxBuffers() == 1) + if (mShadowCopy.size() == 0) { // Don't need shadow copy in this case, use the buffer directly return mBuffer->mapReadOnly(contextMtl); @@ -479,13 +497,45 @@ const std::vector BufferMtl::getRestartIndicesFromClientData( return restartIndices; } +namespace +{ + +bool useSharedMemory(ContextMtl *contextMtl, gl::BufferUsage usage) +{ + const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures(); + if (features.alwaysUseManagedStorageModeForBuffers.enabled) + { + return false; + } + + if (features.alwaysUseSharedStorageModeForBuffers.enabled) + { + return true; + } + + switch (usage) + { + case gl::BufferUsage::StaticCopy: + case gl::BufferUsage::StaticDraw: + case gl::BufferUsage::StaticRead: + case gl::BufferUsage::DynamicRead: + case gl::BufferUsage::StreamRead: + return true; + default: + return false; + } +} + +} // namespace + angle::Result BufferMtl::setDataImpl(const gl::Context *context, gl::BufferBinding target, const void *data, size_t intendedSize, gl::BufferUsage usage) { - ContextMtl *contextMtl = mtl::GetImpl(context); + ContextMtl *contextMtl = mtl::GetImpl(context); + const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures(); // Invalidate conversion buffers if (mState.getSize() != static_cast(intendedSize)) @@ -497,80 +547,32 @@ angle::Result BufferMtl::setDataImpl(const gl::Context *context, markConversionBuffersDirty(); } + mUsage = usage; + mGLSize = intendedSize; size_t adjustedSize = std::max(1, intendedSize); // Ensures no validation layer issues in std140 with data types like vec3 being 12 bytes vs 16 // in MSL. if (target == gl::BufferBinding::Uniform) { + // This doesn't work! A buffer can be allocated on ARRAY_BUFFER and used in UNIFORM_BUFFER + // TODO(anglebug.com/7585) adjustedSize = roundUpPow2(adjustedSize, (size_t)16); } - size_t maxBuffers; - switch (usage) - { - case gl::BufferUsage::StaticCopy: - case gl::BufferUsage::StaticDraw: - case gl::BufferUsage::StaticRead: - case gl::BufferUsage::DynamicRead: - case gl::BufferUsage::StreamRead: - maxBuffers = 1; // static/read buffer doesn't need high speed data update - mBufferPool.setAlwaysUseGPUMem(); - break; - default: - // dynamic buffer, allow up to 10 update per frame/encoding without - // waiting for GPU. - if (adjustedSize <= mtl::kSharedMemBufferMaxBufSizeHint) - { - maxBuffers = 10; - mBufferPool.setAlwaysUseSharedMem(); - } - else - { - maxBuffers = 1; - mBufferPool.setAlwaysUseGPUMem(); - } - break; - } - // Re-create the buffer - mBuffer = nullptr; - ANGLE_TRY(mBufferPool.reset(contextMtl, adjustedSize, 1, maxBuffers)); - - if (maxBuffers > 1) + mtl::BufferManager &bufferManager = contextMtl->getBufferManager(); + if (mBuffer) { - // We use shadow copy to maintain consistent data between buffers in pool - ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(adjustedSize), GL_OUT_OF_MEMORY); - - if (data) - { - // Transfer data to shadow copy buffer - auto ptr = static_cast(data); - std::copy(ptr, ptr + intendedSize, mShadowCopy.data()); - - // Transfer data from shadow copy buffer to GPU buffer. - ANGLE_TRY(commitShadowCopy(context, adjustedSize)); - } - else - { - // This is needed so that first buffer pointer could be available - ANGLE_TRY(commitShadowCopy(context, 0)); - } + // Return the current buffer to the buffer manager + // It will not be re-used until it's no longer in use. + bufferManager.returnBuffer(contextMtl, mBuffer); + mBuffer = nullptr; } - else - { - // We don't need shadow copy if there will be only one buffer in the pool. - ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(0), GL_OUT_OF_MEMORY); - // Allocate one buffer to use - ANGLE_TRY( - mBufferPool.allocate(contextMtl, adjustedSize, nullptr, &mBuffer, nullptr, nullptr)); - - if (data) - { - ANGLE_TRY(setSubDataImpl(context, data, intendedSize, 0)); - } - } + // Get a new buffer + bool useSharedMem = useSharedMemory(contextMtl, usage); + ANGLE_TRY(bufferManager.getBuffer(contextMtl, adjustedSize, useSharedMem, mBuffer)); #ifndef NDEBUG ANGLE_MTL_OBJC_SCOPE @@ -579,9 +581,137 @@ angle::Result BufferMtl::setDataImpl(const gl::Context *context, } #endif + // We may use shadow copy to maintain consistent data between buffers in pool + size_t shadowSize = (!features.preferCpuForBuffersubdata.enabled && + features.useShadowBuffersWhenAppropriate.enabled && + adjustedSize <= mtl::kSharedMemBufferMaxBufSizeHint) + ? adjustedSize + : 0; + ANGLE_MTL_CHECK(contextMtl, mShadowCopy.resize(shadowSize), GL_OUT_OF_MEMORY); + + if (data) + { + ANGLE_TRY(setSubDataImpl(context, data, intendedSize, 0)); + } + return angle::Result::Continue; } +// states: +// * The buffer is not use +// +// safe = true +// +// * The buffer has a pending blit +// +// In this case, as long as we are only reading from it +// via blit to a new buffer our blits will happen after existing +// blits +// +// safe = true +// +// * The buffer has pending writes in a commited render encoder +// +// In this case we're encoding commands that will happen after +// that encoder +// +// safe = true +// +// * The buffer has pending writes in the current render encoder +// +// in this case we have to split/end the render encoder +// before we can use the buffer. +// +// safe = false +bool BufferMtl::isSafeToReadFromBufferViaBlit(ContextMtl *contextMtl) +{ + uint64_t serial = mBuffer->getLastWritingRenderEncoderSerial(); + bool isSameSerial = contextMtl->isCurrentRenderEncoderSerial(serial); + return !isSameSerial; +} + +angle::Result BufferMtl::updateExistingBufferViaBlitFromStagingBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset) +{ + ASSERT(isOffsetAndSizeMetalBlitCompatible(offset, sizeToCopy)); + + mtl::BufferManager &bufferManager = contextMtl->getBufferManager(); + return bufferManager.queueBlitCopyDataToBuffer(contextMtl, srcPtr, sizeToCopy, offset, mBuffer); +} + +// * get a new or unused buffer +// * copy the new data to it +// * copy any old data not overwriten by the new data to the new buffer +// * start using the new buffer +angle::Result BufferMtl::putDataInNewBufferAndStartUsingNewBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset) +{ + ASSERT(isOffsetAndSizeMetalBlitCompatible(offset, sizeToCopy)); + + mtl::BufferManager &bufferManager = contextMtl->getBufferManager(); + mtl::BufferRef oldBuffer = mBuffer; + bool useSharedMem = useSharedMemory(contextMtl, mUsage); + + ANGLE_TRY(bufferManager.getBuffer(contextMtl, mGLSize, useSharedMem, mBuffer)); + mBuffer->get().label = [NSString stringWithFormat:@"BufferMtl=%p(%lu)", this, ++mRevisionCount]; + + uint8_t *ptr = mBuffer->mapWithOpt(contextMtl, false, true); + std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset); + mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy); + + if (offset > 0 || offset + sizeToCopy < mGLSize) + { + mtl::BlitCommandEncoder *blitEncoder = + contextMtl->getBlitCommandEncoderWithoutEndingRenderEncoder(); + if (offset > 0) + { + // copy old data before updated region + blitEncoder->copyBuffer(oldBuffer, 0, mBuffer, 0, offset); + } + if (offset + sizeToCopy < mGLSize) + { + // copy old data after updated region + const size_t endOffset = offset + sizeToCopy; + const size_t endSizeToCopy = mGLSize - endOffset; + blitEncoder->copyBuffer(oldBuffer, endOffset, mBuffer, endOffset, endSizeToCopy); + } + } + + bufferManager.returnBuffer(contextMtl, oldBuffer); + return angle::Result::Continue; +} + +angle::Result BufferMtl::copyDataToExistingBufferViaCPU(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset) +{ + uint8_t *ptr = mBuffer->map(contextMtl); + std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset); + mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy); + return angle::Result::Continue; +} + +angle::Result BufferMtl::updateShadowCopyThenCopyShadowToNewBuffer(ContextMtl *contextMtl, + const uint8_t *srcPtr, + size_t sizeToCopy, + size_t offset) +{ + // 1. Before copying data from client, we need to synchronize modified data from GPU to + // shadow copy first. + ensureShadowCopySyncedFromGPU(contextMtl); + + // 2. Copy data from client to shadow copy. + std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset); + + // 3. Copy data from shadow copy to GPU. + return commitShadowCopy(contextMtl); +} + angle::Result BufferMtl::setSubDataImpl(const gl::Context *context, const void *data, size_t size, @@ -594,68 +724,72 @@ angle::Result BufferMtl::setSubDataImpl(const gl::Context *context, ASSERT(mBuffer); - ContextMtl *contextMtl = mtl::GetImpl(context); + ContextMtl *contextMtl = mtl::GetImpl(context); + const angle::FeaturesMtl &features = contextMtl->getDisplay()->getFeatures(); - ANGLE_MTL_TRY(contextMtl, offset <= mBuffer->size()); + ANGLE_MTL_TRY(contextMtl, offset <= mGLSize); auto srcPtr = static_cast(data); - auto sizeToCopy = std::min(size, mBuffer->size() - offset); + auto sizeToCopy = std::min(size, mGLSize - offset); markConversionBuffersDirty(); - if (mBufferPool.getMaxBuffers() == 1) + if (features.preferCpuForBuffersubdata.enabled) { - ASSERT(mBuffer); - uint8_t *ptr = mBuffer->map(contextMtl); - std::copy(srcPtr, srcPtr + sizeToCopy, ptr + offset); - mBuffer->unmapAndFlushSubset(contextMtl, offset, sizeToCopy); + return copyDataToExistingBufferViaCPU(contextMtl, srcPtr, sizeToCopy, offset); + } + + if (mShadowCopy.size() > 0) + { + return updateShadowCopyThenCopyShadowToNewBuffer(contextMtl, srcPtr, sizeToCopy, offset); } else { - ASSERT(mShadowCopy.size()); + bool alwaysUseStagedBufferUpdates = features.alwaysUseStagedBufferUpdates.enabled; - // 1. Before copying data from client, we need to synchronize modified data from GPU to - // shadow copy first. - ensureShadowCopySyncedFromGPU(contextMtl); - - // 2. Copy data from client to shadow copy. - std::copy(srcPtr, srcPtr + sizeToCopy, mShadowCopy.data() + offset); - - // 3. Copy data from shadow copy to GPU. - ANGLE_TRY(commitShadowCopy(context)); + if (isOffsetAndSizeMetalBlitCompatible(offset, size) && + (alwaysUseStagedBufferUpdates || mBuffer->isBeingUsedByGPU(contextMtl))) + { + if (alwaysUseStagedBufferUpdates || !isSafeToReadFromBufferViaBlit(contextMtl)) + { + // We can't use the buffer now so copy the data + // to a staging buffer and blit it in + return updateExistingBufferViaBlitFromStagingBuffer(contextMtl, srcPtr, sizeToCopy, + offset); + } + else + { + return putDataInNewBufferAndStartUsingNewBuffer(contextMtl, srcPtr, sizeToCopy, + offset); + } + } + else + { + return copyDataToExistingBufferViaCPU(contextMtl, srcPtr, sizeToCopy, offset); + } } - - return angle::Result::Continue; } -angle::Result BufferMtl::commitShadowCopy(const gl::Context *context) +angle::Result BufferMtl::commitShadowCopy(ContextMtl *contextMtl) { - return commitShadowCopy(context, size()); + return commitShadowCopy(contextMtl, mGLSize); } -angle::Result BufferMtl::commitShadowCopy(const gl::Context *context, size_t size) +angle::Result BufferMtl::commitShadowCopy(ContextMtl *contextMtl, size_t size) { - ContextMtl *contextMtl = mtl::GetImpl(context); + mtl::BufferManager &bufferManager = contextMtl->getBufferManager(); + bool useSharedMem = useSharedMemory(contextMtl, mUsage); - if (!size) - { - // Skip mapping if size to commit is zero. - // zero size is passed to allocate buffer only. - ANGLE_TRY(mBufferPool.allocate(contextMtl, mShadowCopy.size(), nullptr, &mBuffer, nullptr, - nullptr)); - } - else - { - uint8_t *ptr = nullptr; - mBufferPool.releaseInFlightBuffers(contextMtl); - ANGLE_TRY( - mBufferPool.allocate(contextMtl, mShadowCopy.size(), &ptr, &mBuffer, nullptr, nullptr)); + bufferManager.returnBuffer(contextMtl, mBuffer); + ANGLE_TRY(bufferManager.getBuffer(contextMtl, mGLSize, useSharedMem, mBuffer)); + if (size) + { + uint8_t *ptr = mBuffer->mapWithOpt(contextMtl, false, true); std::copy(mShadowCopy.data(), mShadowCopy.data() + size, ptr); + mBuffer->unmapAndFlushSubset(contextMtl, 0, size); } - ANGLE_TRY(mBufferPool.commit(contextMtl)); - return angle::Result::Continue; } diff --git a/src/libANGLE/renderer/metal/ContextMtl.h b/src/libANGLE/renderer/metal/ContextMtl.h index 3eb8aab09..2de2da943 100644 --- a/src/libANGLE/renderer/metal/ContextMtl.h +++ b/src/libANGLE/renderer/metal/ContextMtl.h @@ -17,6 +17,7 @@ #include "libANGLE/Context.h" #include "libANGLE/renderer/ContextImpl.h" #include "libANGLE/renderer/metal/ProvokingVertexHelper.h" +#include "libANGLE/renderer/metal/mtl_buffer_manager.h" #include "libANGLE/renderer/metal/mtl_buffer_pool.h" #include "libANGLE/renderer/metal/mtl_command_buffer.h" #include "libANGLE/renderer/metal/mtl_context_device.h" @@ -369,6 +370,7 @@ class ContextMtl : public ContextImpl, public mtl::Context // Will end current command encoder and start new blit command encoder. Unless a blit comamnd // encoder is already started. mtl::BlitCommandEncoder *getBlitCommandEncoder(); + // Will end current command encoder and start new compute command encoder. Unless a compute // command encoder is already started. mtl::ComputeCommandEncoder *getComputeCommandEncoder(); @@ -381,6 +383,8 @@ class ContextMtl : public ContextImpl, public mtl::Context // Get the provoking vertex command encoder. mtl::ComputeCommandEncoder *getIndexPreprocessingCommandEncoder(); + bool isCurrentRenderEncoderSerial(uint64_t serial); + const mtl::ContextDevice &getMetalDevice() const { return mContextDevice; } angle::Result copy2DTextureSlice0Level0ToWorkTexture(const mtl::TextureRef &srcTexture); @@ -390,6 +394,7 @@ class ContextMtl : public ContextImpl, public mtl::Context const mtl::MipmapNativeLevel &mipNativeLevel, uint32_t layerIndex); const mtl::BufferRef &getWorkBuffer() const { return mWorkBuffer; } + mtl::BufferManager &getBufferManager() { return mBufferManager; } private: void ensureCommandBufferReady(); @@ -600,6 +605,8 @@ class ContextMtl : public ContextImpl, public mtl::Context MTLCullMode mCullMode; bool mCullAllPolygons = false; + mtl::BufferManager mBufferManager; + // Lineloop and TriFan index buffer mtl::BufferPool mLineLoopIndexBuffer; mtl::BufferPool mLineLoopLastSegmentIndexBuffer; diff --git a/src/libANGLE/renderer/metal/ContextMtl.mm b/src/libANGLE/renderer/metal/ContextMtl.mm index 03396f35c..6f00b611c 100644 --- a/src/libANGLE/renderer/metal/ContextMtl.mm +++ b/src/libANGLE/renderer/metal/ContextMtl.mm @@ -1663,6 +1663,11 @@ void ContextMtl::endRenderEncoding(mtl::RenderCommandEncoder *encoder) disableActiveOcclusionQueryInRenderPass(); } + if (mBlitEncoder.valid()) + { + mBlitEncoder.endEncoding(); + } + encoder->endEncoding(); // Resolve visibility results @@ -1755,6 +1760,16 @@ bool ContextMtl::hasStartedRenderPass(const mtl::RenderPassDesc &desc) mRenderEncoder.renderPassDesc().equalIgnoreLoadStoreOptions(desc); } +bool ContextMtl::isCurrentRenderEncoderSerial(uint64_t serial) +{ + if (!mRenderEncoder.valid()) + { + return false; + } + + return serial == mRenderEncoder.getSerial(); +} + // Get current render encoder mtl::RenderCommandEncoder *ContextMtl::getRenderCommandEncoder() { @@ -1856,6 +1871,11 @@ mtl::RenderCommandEncoder *ContextMtl::getRenderTargetCommandEncoder( mtl::BlitCommandEncoder *ContextMtl::getBlitCommandEncoder() { + if (mRenderEncoder.valid() || mComputeEncoder.valid()) + { + endEncoding(true); + } + if (mBlitEncoder.valid()) { return &mBlitEncoder; @@ -1882,6 +1902,11 @@ mtl::BlitCommandEncoder *ContextMtl::getBlitCommandEncoderWithoutEndingRenderEnc mtl::ComputeCommandEncoder *ContextMtl::getComputeCommandEncoder() { + if (mRenderEncoder.valid() || mBlitEncoder.valid()) + { + endEncoding(true); + } + if (mComputeEncoder.valid()) { return &mComputeEncoder; @@ -2725,7 +2750,8 @@ angle::Result ContextMtl::copyTextureSliceLevelToWorkBuffer( // Expand the buffer if it is not big enough. if (!mWorkBuffer || mWorkBuffer->size() < sizeInBytes) { - ANGLE_TRY(mtl::Buffer::MakeBuffer(this, sizeInBytes, nullptr, &mWorkBuffer)); + ANGLE_TRY(mtl::Buffer::MakeBufferWithSharedMemOpt(this, true, sizeInBytes, nullptr, + &mWorkBuffer)); } gl::Rectangle region(0, 0, width, height); diff --git a/src/libANGLE/renderer/metal/DisplayMtl.mm b/src/libANGLE/renderer/metal/DisplayMtl.mm index 68f6b8938..419acdb63 100644 --- a/src/libANGLE/renderer/metal/DisplayMtl.mm +++ b/src/libANGLE/renderer/metal/DisplayMtl.mm @@ -1193,6 +1193,16 @@ void DisplayMtl::initializeFeatures() ANGLE_FEATURE_CONDITION((&mFeatures), preemptivelyStartProvokingVertexCommandBuffer, isAMD()); + ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseStagedBufferUpdates, isAMD()); + ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseManagedStorageModeForBuffers, isAMD()); + + ANGLE_FEATURE_CONDITION((&mFeatures), alwaysUseSharedStorageModeForBuffers, isIntel()); + ANGLE_FEATURE_CONDITION((&mFeatures), useShadowBuffersWhenAppropriate, isIntel()); + + // At least one of these must not be set. + ASSERT(!mFeatures.alwaysUseManagedStorageModeForBuffers.enabled || + !mFeatures.alwaysUseSharedStorageModeForBuffers.enabled); + bool defaultDirectToMetal = true; ANGLE_FEATURE_CONDITION((&mFeatures), directMetalGeneration, defaultDirectToMetal); diff --git a/src/libANGLE/renderer/metal/FrameBufferMtl.mm b/src/libANGLE/renderer/metal/FrameBufferMtl.mm index 3c010fa28..6f5c41915 100644 --- a/src/libANGLE/renderer/metal/FrameBufferMtl.mm +++ b/src/libANGLE/renderer/metal/FrameBufferMtl.mm @@ -1611,6 +1611,7 @@ angle::Result FramebufferMtl::readPixelsImpl(const gl::Context *context, return result; } + if (texture->isBeingUsedByGPU(contextMtl)) { contextMtl->flushCommandBuffer(mtl::WaitUntilFinished); diff --git a/src/libANGLE/renderer/metal/ProgramMtl.mm b/src/libANGLE/renderer/metal/ProgramMtl.mm index 150e858a7..65777edd2 100644 --- a/src/libANGLE/renderer/metal/ProgramMtl.mm +++ b/src/libANGLE/renderer/metal/ProgramMtl.mm @@ -1460,7 +1460,7 @@ angle::Result ProgramMtl::legalizeUniformBufferOffsets( // Has the content of the buffer has changed since last conversion? if (conversion->dirty) { - const uint8_t *srcBytes = bufferMtl->getClientShadowCopyData(context); + const uint8_t *srcBytes = bufferMtl->getBufferDataReadOnly(context); srcBytes += offsetModulo; size_t sizeToCopy = bufferMtl->size() - offsetModulo; size_t bytesToAllocate = roundUp(sizeToCopy, 16u); diff --git a/src/libANGLE/renderer/metal/TextureMtl.mm b/src/libANGLE/renderer/metal/TextureMtl.mm index 7c24916ce..90548ad93 100644 --- a/src/libANGLE/renderer/metal/TextureMtl.mm +++ b/src/libANGLE/renderer/metal/TextureMtl.mm @@ -1817,7 +1817,7 @@ angle::Result TextureMtl::setPerSliceSubImage(const gl::Context *context, { // NOTE(hqle): packed depth & stencil texture cannot copy from buffer directly, needs // to split its depth & stencil data and copy separately. - const uint8_t *clientData = unpackBufferMtl->getClientShadowCopyData(contextMtl); + const uint8_t *clientData = unpackBufferMtl->getBufferDataReadOnly(contextMtl); clientData += offset; ANGLE_TRY(UploadTextureContents(context, mFormat.actualAngleFormat(), mtlArea, mtl::kZeroNativeMipLevel, slice, clientData, @@ -1871,7 +1871,7 @@ angle::Result TextureMtl::convertAndSetPerSliceSubImage(const gl::Context *conte mFormat.intendedAngleFormat().isBlock) { // Unsupported format, use CPU path. - const uint8_t *clientData = unpackBufferMtl->getClientShadowCopyData(contextMtl); + const uint8_t *clientData = unpackBufferMtl->getBufferDataReadOnly(contextMtl); clientData += offset; ANGLE_TRY(convertAndSetPerSliceSubImage(context, slice, mtlArea, internalFormat, type, pixelsAngleFormat, pixelsRowPitch, diff --git a/src/libANGLE/renderer/metal/VertexArrayMtl.mm b/src/libANGLE/renderer/metal/VertexArrayMtl.mm index d0c162749..f205dbab5 100644 --- a/src/libANGLE/renderer/metal/VertexArrayMtl.mm +++ b/src/libANGLE/renderer/metal/VertexArrayMtl.mm @@ -881,7 +881,7 @@ angle::Result VertexArrayMtl::convertIndexBuffer(const gl::Context *glContext, { // We shouldn't use GPU to convert when we are in a middle of a render pass. ANGLE_TRY(StreamIndexData(contextMtl, &conversion->data, - idxBuffer->getClientShadowCopyData(contextMtl) + offsetModulo, + idxBuffer->getBufferDataReadOnly(contextMtl) + offsetModulo, indexType, indexCount, glState.isPrimitiveRestartEnabled(), &conversion->convertedBuffer, &conversion->convertedOffset)); } @@ -1061,7 +1061,7 @@ angle::Result VertexArrayMtl::convertVertexBufferCPU(ContextMtl *contextMtl, ConversionBufferMtl *conversion) { - const uint8_t *srcBytes = srcBuffer->getClientShadowCopyData(contextMtl); + const uint8_t *srcBytes = srcBuffer->getBufferDataReadOnly(contextMtl); ANGLE_CHECK_GL_ALLOC(contextMtl, srcBytes); VertexConversionBufferMtl *vertexConverison = static_cast(conversion); @@ -1151,4 +1151,4 @@ angle::Result VertexArrayMtl::convertVertexBufferGPU(const gl::Context *glContex return angle::Result::Continue; } -} +} // namespace rx diff --git a/src/libANGLE/renderer/metal/mtl_buffer_manager.h b/src/libANGLE/renderer/metal/mtl_buffer_manager.h new file mode 100644 index 000000000..edcb3a1a6 --- /dev/null +++ b/src/libANGLE/renderer/metal/mtl_buffer_manager.h @@ -0,0 +1,94 @@ +// +// Copyright 2022 The ANGLE Project Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// mtl_buffer_manager.h: +// BufferManager manages buffers across all contexts for a single +// device. +// +#ifndef LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_ +#define LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_ + +#include "common/FixedVector.h" +#include "libANGLE/renderer/metal/mtl_resources.h" + +#include + +namespace rx +{ +class ContextMtl; + +namespace mtl +{ + +// GL buffers are backed by Metal buffers. Which metal +// buffer is backing a particular GL buffer is fluid. +// The case being optimized is a loop of something like +// +// for 1..4 +// glBufferSubData +// glDrawXXX +// +// You can't update a buffer in the middle of a render pass +// in metal so instead we'd end up using multiple buffers. +// +// Simple case, the call to `glBufferSubData` updates the +// entire buffer. In this case we'd end up with each call +// to `glBufferSubData` getting a new buffer from this +// BufferManager and copying the new data to it. We'd +// end up submitting this renderpass +// +// draw with buf1 +// draw with buf2 +// draw with buf3 +// draw with buf4 +// +// The GL buffer now references buf4. And buf1, buf2, buf3 and +// buf0 (the buffer that was previously referenced by the GL buffer) +// are all added to the inuse-list +// + +// This macro enables showing the running totals of the various +// buckets of unused buffers. +// #define ANGLE_MTL_TRACK_BUFFER_MEM + +class BufferManager +{ + public: + BufferManager(); + + static constexpr size_t kMaxStagingBufferSize = 1024 * 1024; + static constexpr size_t kMaxSizePowerOf2 = 64; + + angle::Result queueBlitCopyDataToBuffer(ContextMtl *contextMtl, + const void *srcPtr, + size_t sizeToCopy, + size_t offset, + mtl::BufferRef &dstMetalBuffer); + + angle::Result getBuffer(ContextMtl *contextMtl, + size_t size, + bool useSharedMem, + mtl::BufferRef &bufferRef); + void returnBuffer(ContextMtl *contextMtl, mtl::BufferRef &bufferRef); + + private: + typedef std::vector BufferList; + + void freeUnusedBuffers(ContextMtl *contextMtl); + void addBufferRefToFreeLists(mtl::BufferRef &bufferRef); + + BufferList mInUseBuffers; + + angle::FixedVector mFreeBuffers[2]; +#ifdef ANGLE_MTL_TRACK_BUFFER_MEM + angle::FixedVector mAllocations; + size_t mTotalMem = 0; +#endif +}; + +} // namespace mtl +} // namespace rx + +#endif /* LIBANGLE_RENDERER_METAL_MTL_BUFFER_MANAGER_H_ */ diff --git a/src/libANGLE/renderer/metal/mtl_buffer_manager.mm b/src/libANGLE/renderer/metal/mtl_buffer_manager.mm new file mode 100644 index 000000000..255f425bb --- /dev/null +++ b/src/libANGLE/renderer/metal/mtl_buffer_manager.mm @@ -0,0 +1,202 @@ +// +// Copyright 2022 The ANGLE Project Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// mtl_buffer_manager.mm: +// Implements the class methods for BufferManager. +// + +#include "libANGLE/renderer/metal/mtl_buffer_manager.h" + +#include "libANGLE/renderer/metal/ContextMtl.h" +#include "libANGLE/renderer/metal/DisplayMtl.h" + +namespace rx +{ + +namespace mtl +{ + +namespace +{ + +constexpr size_t Log2(size_t num) +{ + return num <= 1 ? 0 : (1 + Log2(num / 2)); +} + +constexpr size_t Log2Ceil(size_t num) +{ + size_t l = Log2(num); + size_t size = size_t(1) << l; + return num == size ? l : l + 1; +} + +#ifdef ANGLE_MTL_TRACK_BUFFER_MEM +const char *memUnitSuffix(size_t powerOf2) +{ + if (powerOf2 < 10) + { + return "b"; + } + if (powerOf2 < 20) + { + return "k"; + } + if (powerOf2 < 30) + { + return "M"; + } + return "G"; +} + +size_t memUnitValue(size_t powerOf2) +{ + if (powerOf2 < 10) + { + return 1u << powerOf2; + } + if (powerOf2 < 20) + { + return 1u << (powerOf2 - 10); + } + if (powerOf2 < 30) + { + return 1u << (powerOf2 - 20); + } + return 1u << (powerOf2 - 30); +} +#endif // ANGLE_MTL_TRACK_BUFFER_MEM + +int sharedMemToIndex(bool useSharedMem) +{ + return useSharedMem ? 1 : 0; +} + +} // namespace + +BufferManager::BufferManager() +#ifdef ANGLE_MTL_TRACK_BUFFER_MEM + : mAllocations(kMaxSizePowerOf2, 0) +#endif +{} + +void BufferManager::freeUnusedBuffers(ContextMtl *contextMtl) +{ + // Scan for the first buffer still in use. + BufferList::iterator firstInUseIter = + std::find_if(mInUseBuffers.begin(), mInUseBuffers.end(), + [&contextMtl](auto ref) { return ref->isBeingUsedByGPU(contextMtl); }); + + // Move unused buffers to the free lists + for (BufferList::iterator it = mInUseBuffers.begin(); it != firstInUseIter; ++it) + { + addBufferRefToFreeLists(*it); + } + mInUseBuffers.erase(mInUseBuffers.begin(), firstInUseIter); +} + +void BufferManager::addBufferRefToFreeLists(mtl::BufferRef &bufferRef) +{ + const size_t bucketNdx = Log2Ceil(bufferRef->size()); + ASSERT(bucketNdx < kMaxSizePowerOf2); + int sharedNdx = sharedMemToIndex(bufferRef->get().storageMode == MTLStorageModeShared); + mFreeBuffers[sharedNdx][bucketNdx].push_back(bufferRef); +} + +void BufferManager::returnBuffer(ContextMtl *contextMtl, BufferRef &bufferRef) +{ + if (bufferRef->isBeingUsedByGPU(contextMtl)) + { + mInUseBuffers.push_back(bufferRef); + } + else + { + addBufferRefToFreeLists(bufferRef); + } +} + +angle::Result BufferManager::getBuffer(ContextMtl *contextMtl, + size_t size, + bool useSharedMem, + BufferRef &bufferRef) +{ + freeUnusedBuffers(contextMtl); + + const size_t bucketNdx = Log2Ceil(size); + const int sharedNdx = sharedMemToIndex(useSharedMem); + BufferList &freeBuffers = mFreeBuffers[sharedNdx][bucketNdx]; + + // If there are free buffers grab one + if (!freeBuffers.empty()) + { + bufferRef = freeBuffers.back(); + freeBuffers.pop_back(); + return angle::Result::Continue; + } + + // Create a new one + mtl::BufferRef newBufferRef; + + size_t allocSize = size_t(1) << bucketNdx; + ASSERT(allocSize >= size); + ANGLE_TRY(mtl::Buffer::MakeBufferWithSharedMemOpt(contextMtl, useSharedMem, allocSize, nullptr, + &newBufferRef)); + +#ifdef ANGLE_MTL_TRACK_BUFFER_MEM + { + mTotalMem += allocSize; + mAllocations[bucketNdx]++; + fprintf(stderr, "totalMem: %zu, ", mTotalMem); + size_t numBuffers = 0; + for (size_t i = 0; i < kMaxSizePowerOf2; ++i) + { + if (mAllocations[i]) + { + numBuffers += mAllocations[i]; + fprintf(stderr, "%zu%s: %zu, ", memUnitValue(i), memUnitSuffix(i), mAllocations[i]); + } + } + fprintf(stderr, " total: %zu\n", numBuffers); + } +#endif + + bufferRef = newBufferRef; + + return angle::Result::Continue; +} + +angle::Result BufferManager::queueBlitCopyDataToBuffer(ContextMtl *contextMtl, + const void *srcPtr, + size_t sizeToCopy, + size_t offset, + mtl::BufferRef &dstMetalBuffer) +{ + const uint8 *src = reinterpret_cast(srcPtr); + bool useShared = + !contextMtl->getDisplay()->getFeatures().alwaysUseManagedStorageModeForBuffers.enabled; + + for (size_t srcOffset = 0; srcOffset < sizeToCopy; srcOffset += kMaxStagingBufferSize) + { + size_t subSizeToCopy = std::min(kMaxStagingBufferSize, sizeToCopy - srcOffset); + + mtl::BufferRef bufferRef; + ANGLE_TRY(getBuffer(contextMtl, subSizeToCopy, useShared, bufferRef)); + + // copy data to buffer + uint8_t *ptr = bufferRef->mapWithOpt(contextMtl, false, true); + std::copy(src + srcOffset, src + srcOffset + subSizeToCopy, ptr); + bufferRef->unmapAndFlushSubset(contextMtl, 0, subSizeToCopy); + + // queue blit + mtl::BlitCommandEncoder *blitEncoder = contextMtl->getBlitCommandEncoder(); + blitEncoder->copyBuffer(bufferRef, 0, dstMetalBuffer, offset + srcOffset, subSizeToCopy); + + returnBuffer(contextMtl, bufferRef); + } + return angle::Result::Continue; +} + +} // namespace mtl +} // namespace rx diff --git a/src/libANGLE/renderer/metal/mtl_command_buffer.h b/src/libANGLE/renderer/metal/mtl_command_buffer.h index 68a93c2d2..b1f2c5a6a 100644 --- a/src/libANGLE/renderer/metal/mtl_command_buffer.h +++ b/src/libANGLE/renderer/metal/mtl_command_buffer.h @@ -80,6 +80,8 @@ class CommandQueue final : public WrappedObject>, angle::Non AutoObjCPtr> makeMetalCommandBuffer(uint64_t *queueSerialOut); void onCommandBufferCommitted(id buf, uint64_t serial); + uint64_t getNextRenderEncoderSerial(); + private: void onCommandBufferCompleted(id buf, uint64_t serial); using ParentClass = WrappedObject>; @@ -94,6 +96,7 @@ class CommandQueue final : public WrappedObject>, angle::Non uint64_t mQueueSerialCounter = 1; std::atomic mCommittedBufferSerial{0}; std::atomic mCompletedBufferSerial{0}; + uint64_t mRenderEncoderCounter = 1; mutable std::mutex mLock; }; @@ -497,6 +500,8 @@ class RenderCommandEncoder final : public CommandEncoder const RenderPassDesc &renderPassDesc() const { return mRenderPassDesc; } bool hasDrawCalls() const { return mHasDrawCalls; } + uint64_t getSerial() const { return mSerial; } + private: // Override CommandEncoder id get() @@ -541,6 +546,7 @@ class RenderCommandEncoder final : public CommandEncoder RenderCommandEncoderStates mStateCache = {}; bool mPipelineStateSet = false; + const uint64_t mSerial = 0; }; class BlitCommandEncoder final : public CommandEncoder diff --git a/src/libANGLE/renderer/metal/mtl_command_buffer.mm b/src/libANGLE/renderer/metal/mtl_command_buffer.mm index 74f2d9ac8..2f226f67f 100644 --- a/src/libANGLE/renderer/metal/mtl_command_buffer.mm +++ b/src/libANGLE/renderer/metal/mtl_command_buffer.mm @@ -11,6 +11,7 @@ #include "libANGLE/renderer/metal/mtl_command_buffer.h" #include +#include #if ANGLE_MTL_SIMULATE_DISCARD_FRAMEBUFFER # include #endif @@ -579,6 +580,11 @@ void CommandQueue::onCommandBufferCompleted(id buf, uint64_t s std::memory_order_relaxed); } +uint64_t CommandQueue::getNextRenderEncoderSerial() +{ + return ++mRenderEncoderCounter; +} + // CommandBuffer implementation CommandBuffer::CommandBuffer(CommandQueue *cmdQueue) : mCmdQueue(*cmdQueue) {} @@ -1065,7 +1071,9 @@ void RenderCommandEncoderStates::reset() // RenderCommandEncoder implemtation RenderCommandEncoder::RenderCommandEncoder(CommandBuffer *cmdBuffer, const OcclusionQueryPool &queryPool) - : CommandEncoder(cmdBuffer, RENDER), mOcclusionQueryPool(queryPool) + : CommandEncoder(cmdBuffer, RENDER), + mOcclusionQueryPool(queryPool), + mSerial(cmdBuffer->cmdQueue().getNextRenderEncoderSerial()) { ANGLE_MTL_OBJC_SCOPE { @@ -1556,6 +1564,7 @@ RenderCommandEncoder &RenderCommandEncoder::setBufferForWrite(gl::ShaderType sha return *this; } + buffer->setLastWritingRenderEncoderSerial(mSerial); cmdBuffer().setWriteDependency(buffer); id mtlBuffer = (buffer ? buffer->get() : nil); @@ -2183,10 +2192,14 @@ BlitCommandEncoder &BlitCommandEncoder::synchronizeResource(Buffer *buffer) } #if TARGET_OS_OSX || TARGET_OS_MACCATALYST - // Only MacOS has separated storage for resource on CPU and GPU and needs explicit - // synchronization - cmdBuffer().setReadDependency(buffer); - [get() synchronizeResource:buffer->get()]; + if (buffer->get().storageMode == MTLStorageModeManaged) + { + // Only MacOS has separated storage for resource on CPU and GPU and needs explicit + // synchronization + cmdBuffer().setReadDependency(buffer); + + [get() synchronizeResource:buffer->get()]; + } #endif return *this; } diff --git a/src/libANGLE/renderer/metal/mtl_common.h b/src/libANGLE/renderer/metal/mtl_common.h index 0d6df361a..25d075b47 100644 --- a/src/libANGLE/renderer/metal/mtl_common.h +++ b/src/libANGLE/renderer/metal/mtl_common.h @@ -106,7 +106,7 @@ constexpr uint32_t kMaxShaderXFBs = gl::IMPLEMENTATION_MAX_TRANSFORM_FEEDBACK_SE // The max size of a buffer that will be allocated in shared memory. // NOTE(hqle): This is just a hint. There is no official document on what is the max allowed size // for shared memory. -constexpr size_t kSharedMemBufferMaxBufSizeHint = 128 * 1024; +constexpr size_t kSharedMemBufferMaxBufSizeHint = 256 * 1024; constexpr size_t kDefaultAttributeSize = 4 * sizeof(float); diff --git a/src/libANGLE/renderer/metal/mtl_render_utils.mm b/src/libANGLE/renderer/metal/mtl_render_utils.mm index 4b854d589..8fceb8736 100644 --- a/src/libANGLE/renderer/metal/mtl_render_utils.mm +++ b/src/libANGLE/renderer/metal/mtl_render_utils.mm @@ -2094,8 +2094,7 @@ angle::Result IndexGeneratorUtils::generateTriFanBufferFromElementsArray( contextMtl->getRenderCommandEncoder())) { IndexGenerationParams cpuPathParams = params; - cpuPathParams.indices = - elementBufferMtl->getClientShadowCopyData(contextMtl) + srcOffset; + cpuPathParams.indices = elementBufferMtl->getBufferDataReadOnly(contextMtl) + srcOffset; return generateTriFanBufferFromElementsArrayCPU(contextMtl, cpuPathParams, indicesGenerated); } @@ -2223,8 +2222,7 @@ angle::Result IndexGeneratorUtils::generateLineLoopBufferFromElementsArray( contextMtl->getRenderCommandEncoder())) { IndexGenerationParams cpuPathParams = params; - cpuPathParams.indices = - elementBufferMtl->getClientShadowCopyData(contextMtl) + srcOffset; + cpuPathParams.indices = elementBufferMtl->getBufferDataReadOnly(contextMtl) + srcOffset; return generateLineLoopBufferFromElementsArrayCPU(contextMtl, cpuPathParams, indicesGenerated); } diff --git a/src/libANGLE/renderer/metal/mtl_resources.h b/src/libANGLE/renderer/metal/mtl_resources.h index afbc985eb..c6ae440f5 100644 --- a/src/libANGLE/renderer/metal/mtl_resources.h +++ b/src/libANGLE/renderer/metal/mtl_resources.h @@ -56,6 +56,7 @@ class Resource : angle::NonCopyable bool hasPendingWorks(Context *context) const; void setUsedByCommandBufferWithQueueSerial(uint64_t serial, bool writing); + void setWrittenToByRenderEncoder(uint64_t serial); uint64_t getCommandBufferQueueSerial() const { return mUsageRef->cmdBufferQueueSerial; } @@ -71,6 +72,15 @@ class Resource : angle::NonCopyable bool isCPUReadMemDirty() const { return mUsageRef->cpuReadMemDirty; } void resetCPUReadMemDirty() { mUsageRef->cpuReadMemDirty = false; } + bool getLastWritingRenderEncoderSerial() const + { + return mUsageRef->lastWritingRenderEncoderSerial; + } + void setLastWritingRenderEncoderSerial(uint64_t serial) const + { + mUsageRef->lastWritingRenderEncoderSerial = serial; + } + virtual size_t estimatedByteSize() const = 0; virtual id getID() const = 0; @@ -98,6 +108,9 @@ class Resource : angle::NonCopyable // This flag is useful for BufferMtl to know whether it should update the shadow copy bool cpuReadMemDirty = false; + + // The id of the last render encoder to write to this resource + uint64_t lastWritingRenderEncoderSerial = 0; }; // One resource object might just be a view of another resource. For example, a texture 2d diff --git a/src/libANGLE/renderer/metal/mtl_resources.mm b/src/libANGLE/renderer/metal/mtl_resources.mm index 976eb1c2e..6eff044a7 100644 --- a/src/libANGLE/renderer/metal/mtl_resources.mm +++ b/src/libANGLE/renderer/metal/mtl_resources.mm @@ -1054,8 +1054,9 @@ void Buffer::flush(ContextMtl *context, size_t offsetWritten, size_t sizeWritten { if (get().storageMode == MTLStorageModeManaged) { - size_t startOffset = std::min(offsetWritten, size()); - size_t endOffset = std::min(offsetWritten + sizeWritten, size()); + size_t bufferSize = size(); + size_t startOffset = std::min(offsetWritten, bufferSize); + size_t endOffset = std::min(offsetWritten + sizeWritten, bufferSize); size_t clampedSize = endOffset - startOffset; if (clampedSize > 0) { diff --git a/src/tests/gl_tests/BufferDataTest.cpp b/src/tests/gl_tests/BufferDataTest.cpp index 47bd7eced..92407191b 100644 --- a/src/tests/gl_tests/BufferDataTest.cpp +++ b/src/tests/gl_tests/BufferDataTest.cpp @@ -1055,6 +1055,149 @@ TEST_P(BufferDataTestES3, NoBufferInitDataCopyBug) ASSERT_GL_NO_ERROR(); } +// This a shortened version of dEQP functional.buffer.copy.basic.array_copy_read. It provoked +// a bug in copyBufferSubData. The bug appeared to be that conversion buffers were not marked +// as dirty and therefore after copyBufferSubData the next draw call using the buffer that +// just had data copied to it was not re-converted. It's not clear to me how this ever worked +// or why changes to bufferSubData from +// https://chromium-review.googlesource.com/c/angle/angle/+/3842641 made this issue appear and +// why it wasn't already broken. +TEST_P(BufferDataTestES3, CopyBufferSubDataDraw) +{ + const char simpleVertex[] = R"(attribute vec2 position; +attribute vec4 color; +varying vec4 vColor; +void main() +{ + gl_Position = vec4(position, 0, 1); + vColor = color; +} +)"; + const char simpleFragment[] = R"(precision mediump float; +varying vec4 vColor; +void main() +{ + gl_FragColor = vColor; +} +)"; + + ANGLE_GL_PROGRAM(program, simpleVertex, simpleFragment); + glUseProgram(program); + + GLint colorLoc = glGetAttribLocation(program, "color"); + ASSERT_NE(-1, colorLoc); + GLint posLoc = glGetAttribLocation(program, "position"); + ASSERT_NE(-1, posLoc); + + glClearColor(0, 0, 0, 0); + + GLBuffer srcBuffer; // green + GLBuffer dstBuffer; // red + + constexpr size_t numElements = 399; + std::vector reds(numElements, GLColorRGB::red); + std::vector greens(numElements, GLColorRGB::green); + constexpr size_t sizeOfElem = sizeof(decltype(greens)::value_type); + constexpr size_t sizeInBytes = numElements * sizeOfElem; + + glBindBuffer(GL_ARRAY_BUFFER, srcBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeInBytes, greens.data(), GL_STREAM_DRAW); + + glBindBuffer(GL_COPY_READ_BUFFER, dstBuffer); + glBufferData(GL_COPY_READ_BUFFER, sizeInBytes, reds.data(), GL_STREAM_DRAW); + ASSERT_GL_NO_ERROR(); + + constexpr size_t numQuads = numElements / 4; + + // Generate quads that fill clip space to use all the vertex colors + std::vector positions(numQuads * 4 * 2); + for (size_t quad = 0; quad < numQuads; ++quad) + { + size_t offset = quad * 4 * 2; + float x0 = float(quad + 0) / numQuads * 2.0f - 1.0f; + float x1 = float(quad + 1) / numQuads * 2.0f - 1.0f; + + /* + 2--3 + | | + 0--1 + */ + positions[offset + 0] = x0; + positions[offset + 1] = -1; + positions[offset + 2] = x1; + positions[offset + 3] = -1; + positions[offset + 4] = x0; + positions[offset + 5] = 1; + positions[offset + 6] = x1; + positions[offset + 7] = 1; + } + glBindBuffer(GL_ARRAY_BUFFER, 0); + glEnableVertexAttribArray(posLoc); + glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, positions.data()); + ASSERT_GL_NO_ERROR(); + + glBindBuffer(GL_ARRAY_BUFFER, srcBuffer); + glEnableVertexAttribArray(colorLoc); + glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr); + ASSERT_GL_NO_ERROR(); + + glClear(GL_COLOR_BUFFER_BIT); + + std::vector indices(numQuads * 6); + for (size_t quad = 0; quad < numQuads; ++quad) + { + size_t ndx = quad * 4; + size_t offset = quad * 6; + indices[offset + 0] = ndx; + indices[offset + 1] = ndx + 1; + indices[offset + 2] = ndx + 2; + indices[offset + 3] = ndx + 2; + indices[offset + 4] = ndx + 1; + indices[offset + 5] = ndx + 3; + } + GLBuffer indexBuffer; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexBuffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, indices.size() * sizeof(decltype(indices)::value_type), + indices.data(), GL_STATIC_DRAW); + + // Draw with srcBuffer (green) + glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0); + EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green); + ASSERT_GL_NO_ERROR(); + + // Draw with dstBuffer (red) + glBindBuffer(GL_ARRAY_BUFFER, dstBuffer); + glEnableVertexAttribArray(colorLoc); + glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr); + glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0); + EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::red); + ASSERT_GL_NO_ERROR(); + + // Copy src to dst. Yes, we're using GL_COPY_READ_BUFFER as dest because that's what the dEQP + // test was testing. + glBindBuffer(GL_ARRAY_BUFFER, srcBuffer); + glBindBuffer(GL_COPY_READ_BUFFER, dstBuffer); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_READ_BUFFER, 0, 0, sizeInBytes); + ASSERT_GL_NO_ERROR(); + + // Draw with srcBuffer. It should still be green. + glBindBuffer(GL_ARRAY_BUFFER, srcBuffer); + glEnableVertexAttribArray(colorLoc); + glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr); + glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0); + EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green); + ASSERT_GL_NO_ERROR(); + + // Draw with dstBuffer. It should now be green too. + glBindBuffer(GL_ARRAY_BUFFER, dstBuffer); + glEnableVertexAttribArray(colorLoc); + glVertexAttribPointer(colorLoc, 3, GL_UNSIGNED_BYTE, GL_TRUE, 0, nullptr); + glDrawElements(GL_TRIANGLES, numQuads * 6, GL_UNSIGNED_SHORT, 0); + EXPECT_PIXEL_RECT_EQ(0, 0, 16, 16, GLColor::green); + + ASSERT_GL_NO_ERROR(); +} + // Ensures that calling glBufferData on a mapped buffer results in an unmapped buffer TEST_P(BufferDataTestES3, BufferDataUnmap) { diff --git a/util/angle_features_autogen.cpp b/util/angle_features_autogen.cpp index 9e59ca3fe..38591798e 100644 --- a/util/angle_features_autogen.cpp +++ b/util/angle_features_autogen.cpp @@ -38,6 +38,9 @@ constexpr PackedEnumMap kFeatureNames = {{ "allowTranslateUniformBlockToStructuredBuffer"}, {Feature::AlwaysCallUseProgramAfterLink, "alwaysCallUseProgramAfterLink"}, {Feature::AlwaysUnbindFramebufferTexture2D, "alwaysUnbindFramebufferTexture2D"}, + {Feature::AlwaysUseManagedStorageModeForBuffers, "alwaysUseManagedStorageModeForBuffers"}, + {Feature::AlwaysUseSharedStorageModeForBuffers, "alwaysUseSharedStorageModeForBuffers"}, + {Feature::AlwaysUseStagedBufferUpdates, "alwaysUseStagedBufferUpdates"}, {Feature::AsyncCommandQueue, "asyncCommandQueue"}, {Feature::Avoid1BitAlphaTextureFormats, "avoid1BitAlphaTextureFormats"}, {Feature::BindTransformFeedbackBufferBeforeBindBufferRange, @@ -187,6 +190,7 @@ constexpr PackedEnumMap kFeatureNames = {{ {Feature::PreemptivelyStartProvokingVertexCommandBuffer, "preemptivelyStartProvokingVertexCommandBuffer"}, {Feature::PreferAggregateBarrierCalls, "preferAggregateBarrierCalls"}, + {Feature::PreferCpuForBuffersubdata, "preferCpuForBuffersubdata"}, {Feature::PreferCPUForBufferSubData, "preferCPUForBufferSubData"}, {Feature::PreferDeviceLocalMemoryHostVisible, "preferDeviceLocalMemoryHostVisible"}, {Feature::PreferDrawClearOverVkCmdClearAttachments, "preferDrawClearOverVkCmdClearAttachments"}, @@ -317,6 +321,7 @@ constexpr PackedEnumMap kFeatureNames = {{ {Feature::UseInstancedPointSpriteEmulation, "useInstancedPointSpriteEmulation"}, {Feature::UseMultipleDescriptorsForExternalFormats, "useMultipleDescriptorsForExternalFormats"}, {Feature::UseNonZeroStencilWriteMaskStaticState, "useNonZeroStencilWriteMaskStaticState"}, + {Feature::UseShadowBuffersWhenAppropriate, "useShadowBuffersWhenAppropriate"}, {Feature::UseSystemMemoryForConstantBuffers, "useSystemMemoryForConstantBuffers"}, {Feature::UseUnusedBlocksWithStandardOrSharedLayout, "useUnusedBlocksWithStandardOrSharedLayout"}, diff --git a/util/angle_features_autogen.h b/util/angle_features_autogen.h index 99ce6b541..7c2eac619 100644 --- a/util/angle_features_autogen.h +++ b/util/angle_features_autogen.h @@ -37,6 +37,9 @@ enum class Feature AllowTranslateUniformBlockToStructuredBuffer, AlwaysCallUseProgramAfterLink, AlwaysUnbindFramebufferTexture2D, + AlwaysUseManagedStorageModeForBuffers, + AlwaysUseSharedStorageModeForBuffers, + AlwaysUseStagedBufferUpdates, AsyncCommandQueue, Avoid1BitAlphaTextureFormats, BindTransformFeedbackBufferBeforeBindBufferRange, @@ -176,6 +179,7 @@ enum class Feature PrecisionSafeDivision, PreemptivelyStartProvokingVertexCommandBuffer, PreferAggregateBarrierCalls, + PreferCpuForBuffersubdata, PreferCPUForBufferSubData, PreferDeviceLocalMemoryHostVisible, PreferDrawClearOverVkCmdClearAttachments, @@ -293,6 +297,7 @@ enum class Feature UseInstancedPointSpriteEmulation, UseMultipleDescriptorsForExternalFormats, UseNonZeroStencilWriteMaskStaticState, + UseShadowBuffersWhenAppropriate, UseSystemMemoryForConstantBuffers, UseUnusedBlocksWithStandardOrSharedLayout, VertexIDDoesNotIncludeBaseVertex,