From 5a2553a7b64c48569f883de424f1747045bcdc0f Mon Sep 17 00:00:00 2001 From: Shahbaz Youssefi Date: Wed, 7 Aug 2019 14:44:12 -0400 Subject: [PATCH] Vulkan: Emulate subgroup ops in seamful cubemap emulation Where subgroup ops are not available, they are emulated as such: Code with subgroup ops: float lH = subgroupQuadSwapHorizontal(layer); float lV = subgroupQuadSwapVertical(layer); float lD = subgroupQuadSwapDiagonal(layer); bool isHelperH = subgroupQuadSwapHorizontal(gl_HelperInvocation); bool isHelperV = subgroupQuadSwapVertical(gl_HelperInvocation); if (gl_HelperInvocation) { layer = !isHelperH ? lH : !isHelperV ? lV : lD; } Emulated code: float nonHelperLayer = gl_HelperInvocation ? 0.0 : layer; float lH = abs(dFdxFine(nonHelperLayer)); float lV = abs(dFdyFine(nonHelperLayer)); float lD = abs(dFdxFine(lV)); float isHelperDiffH = abs(dFdxFine(float(gl_HelperInvocation))); bool isNonHelperH = isHelperDiffH > 0.5; float isHelperDiffV = abs(dFdyFine(float(gl_HelperInvocation))); bool isNonHelperV = isHelperDiffV > 0.5; if (gl_HelperInvocation) { layer = isNonHelperH ? lH : isNonHelperV ? lV : lD; } Both paths are supported as on nvidia devices the emulated code misbehaves. This change therefore effectively only enables seamful cube map emulation on Android where subgroup operations are not supported. Bug: angleproject:3243 Bug: angleproject:3732 Change-Id: I9664d9760756758748183eb121c626f176789f3a Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1742222 Reviewed-by: Shahbaz Youssefi Commit-Queue: Shahbaz Youssefi --- include/GLSLANG/ShaderLang.h | 9 +- src/compiler/translator/TranslatorVulkan.cpp | 10 +- .../RewriteCubeMapSamplersAs2DArray.cpp | 199 +++++++++++++----- .../RewriteCubeMapSamplersAs2DArray.h | 3 +- src/libANGLE/renderer/vulkan/ContextVk.cpp | 20 +- src/libANGLE/renderer/vulkan/ContextVk.h | 12 +- .../renderer/vulkan/GlslangWrapper.cpp | 14 +- src/libANGLE/renderer/vulkan/GlslangWrapper.h | 4 +- src/libANGLE/renderer/vulkan/ProgramVk.cpp | 15 +- src/libANGLE/renderer/vulkan/RendererVk.cpp | 1 + src/libANGLE/renderer/vulkan/ShaderVk.cpp | 12 +- .../deqp_gles2_test_expectations.txt | 19 -- 12 files changed, 216 insertions(+), 102 deletions(-) diff --git a/include/GLSLANG/ShaderLang.h b/include/GLSLANG/ShaderLang.h index c26310ac2..0134c59e9 100644 --- a/include/GLSLANG/ShaderLang.h +++ b/include/GLSLANG/ShaderLang.h @@ -26,7 +26,7 @@ // Version number for shader translation API. // It is incremented every time the API changes. -#define ANGLE_SH_VERSION 211 +#define ANGLE_SH_VERSION 212 enum ShShaderSpec { @@ -288,9 +288,14 @@ const ShCompileOptions SH_FORCE_ATOMIC_VALUE_RESOLUTION = UINT64_C(1) << 42; const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43; // Emulate seamful cube map sampling for OpenGL ES2.0. Currently only applies to the Vulkan -// backend, as subgroup operations are used. Once that dependency is broken, could be used with +// backend, as is done after samplers are moved out of structs. Can likely be made to work on // the other backends as well. +// +// There are two variations of this. One using subgroup operations where available, and another +// that emulates those operations using dFdxFine and dFdyFine. The latter is more universally +// available, but is buggy on Nvidia. const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44; +const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45; // Defines alternate strategies for implementing array index clamping. enum ShArrayIndexClampingStrategy diff --git a/src/compiler/translator/TranslatorVulkan.cpp b/src/compiler/translator/TranslatorVulkan.cpp index 3819e5180..fe4560cc2 100644 --- a/src/compiler/translator/TranslatorVulkan.cpp +++ b/src/compiler/translator/TranslatorVulkan.cpp @@ -645,7 +645,7 @@ void TranslatorVulkan::translate(TIntermBlock *root, sink << "#version 450 core\n"; - if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING) + if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP) { sink << "#extension GL_KHR_shader_subgroup_quad : require\n"; } @@ -688,10 +688,12 @@ void TranslatorVulkan::translate(TIntermBlock *root, // Rewrite samplerCubes as sampler2DArrays. This must be done after rewriting struct samplers // as it doesn't expect that. - if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING) + if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING | + SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)) { - RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(), - getShaderType() == GL_FRAGMENT_SHADER); + RewriteCubeMapSamplersAs2DArray( + root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER, + compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP); } if (defaultUniformCount > 0) diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp index bc1a2ae36..92012027d 100644 --- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp +++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp @@ -39,9 +39,56 @@ TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable, return neighbor; } +// Calculate the difference of a value with another invocation in the quad. Used to emulate +// GetValueFromNeighbor where subgroup operations are not present. +// +// See comment in declareCoordTranslationFunction. +TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable, + TIntermBlock *body, + TFunction *dFdxyFine, + TIntermTyped *variable, + const TType *variableType) +{ + TIntermTyped *neighborValue = + TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable})); + TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr); + + TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType)); + body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue)); + + return neighbor; +} + +// Used to emulate GetValueFromNeighbor with bool values. +TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable, + TIntermBlock *body, + TFunction *dFdxyFine, + TIntermTyped *gl_HelperInvocation) +{ + const TType *boolType = StaticType::GetBasic(); + const TType *floatType = StaticType::GetBasic(); + + TIntermTyped *gl_HelperInvocationAsFloat = + TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation})); + TIntermSymbol *diffWithNeighbor = + GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType); + + TIntermTyped *isNeighborNonHelperValue = + new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f)); + TIntermSymbol *isNeighborNonHelper = + new TIntermSymbol(CreateTempVariable(symbolTable, boolType)); + body->appendStatement( + CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue)); + + return isNeighborNonHelper; +} + // If this is a helper invocation, retrieve the layer index (cube map face) from another invocation // in the quad that is not a helper. See comment in declareCoordTranslationFunction. -void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *body, TIntermTyped *l) +void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, + TIntermBlock *body, + TIntermTyped *l, + bool useSubgroupOps) { TVariable *gl_HelperInvocationVar = new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"), @@ -50,48 +97,97 @@ void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *bo const TType *boolType = StaticType::GetBasic(); const TType *floatType = StaticType::GetBasic(); - TFunction *quadSwapHorizontalBool = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), - SymbolType::AngleInternal, boolType, true); - TFunction *quadSwapHorizontalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), - SymbolType::AngleInternal, floatType, true); - TFunction *quadSwapVerticalBool = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), - SymbolType::AngleInternal, boolType, true); - TFunction *quadSwapVerticalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), - SymbolType::AngleInternal, floatType, true); - TFunction *quadSwapDiagonalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"), - SymbolType::AngleInternal, floatType, true); - quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType)); - quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType)); - quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); - quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); - quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + TIntermSymbol *lH; + TIntermSymbol *lV; + TIntermSymbol *lD; - // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done - // outside `if`s so the non-helper thread is not turned inactive. - TIntermSymbol *lH = - GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType); - TIntermSymbol *lV = - GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType); - TIntermSymbol *lD = - GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType); + TIntermTyped *horizontalIsNonHelper; + TIntermTyped *verticalIsNonHelper; - // Get the value of gl_HelperInvocation from the neighbors too. - TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor( - symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType); - TIntermSymbol *verticalIsHelper = GetValueFromNeighbor( - symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType); + if (useSubgroupOps) + { + TFunction *quadSwapHorizontalBool = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), + SymbolType::AngleInternal, boolType, true); + TFunction *quadSwapHorizontalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), + SymbolType::AngleInternal, floatType, true); + TFunction *quadSwapVerticalBool = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), + SymbolType::AngleInternal, boolType, true); + TFunction *quadSwapVerticalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), + SymbolType::AngleInternal, floatType, true); + TFunction *quadSwapDiagonalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"), + SymbolType::AngleInternal, floatType, true); - // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not - // enough to test if the neighbor is not a helper, we should also check if it's active. - TIntermTyped *horizontalIsNonHelper = - new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr); - TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr); + quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType)); + quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType)); + quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + + // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done + // outside `if`s so the non-helper thread is not turned inactive. + lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType); + lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), + floatType); + lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), + floatType); + + // Get the value of gl_HelperInvocation from the neighbors too. + TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor( + symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType); + TIntermSymbol *verticalIsHelper = GetValueFromNeighbor( + symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType); + + // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's + // not enough to test if the neighbor is not a helper, we should also check if it's active. + horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr); + verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr); + } + else + { + TFunction *dFdxFineBool = new TFunction(symbolTable, ImmutableString("dFdxFine"), + SymbolType::AngleInternal, boolType, true); + TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"), + SymbolType::AngleInternal, floatType, true); + TFunction *dFdyFineBool = new TFunction(symbolTable, ImmutableString("dFdyFine"), + SymbolType::AngleInternal, boolType, true); + TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"), + SymbolType::AngleInternal, floatType, true); + + dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType)); + dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType)); + dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + + // layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer; + TIntermTyped *layerQuadSwapHelperValue = + new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l); + TIntermSymbol *layerQuadSwapHelper = + new TIntermSymbol(CreateTempVariable(symbolTable, floatType)); + body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(), + layerQuadSwapHelperValue)); + + // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done + // outside `if`s so the non-helper thread is not turned inactive. + lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType); + lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(), + floatType); + lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType); + + // Get the value of gl_HelperInvocation from the neighbors too. + // + // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's + // not enough to test if the neighbor is not a helper, we should also check if it's active. + horizontalIsNonHelper = + IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy()); + verticalIsNonHelper = + IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy()); + } TIntermTyped *lVD = new TIntermTernary(verticalIsNonHelper, lV, lD); TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD); @@ -163,10 +259,13 @@ void TransformZMajor(TIntermBlock *block, class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser { public: - RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader) + RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, + bool isFragmentShader, + bool useSubgroupOps) : TIntermTraverser(true, true, true, symbolTable), mCubeXYZToArrayUVL(nullptr), mIsFragmentShader(isFragmentShader), + mUseSubgroupOps(useSubgroupOps), mCoordTranslationFunctionDecl(nullptr) {} @@ -543,14 +642,15 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser // incorrect and the wrong mip would be selected. // // We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap* - // operations to retrieve the layer from a non-helper invocation. As a result, the UVs - // calculated for the helper invocations correspond to the same face and end up outside the - // [0, 1] range, but result in correct derivatives. Indeed, sampling from any other kind of - // texture using varyings that range from [0, 1] would follow the same behavior (where - // helper invocations generate UVs out of range). + // (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a + // non-helper invocation. As a result, the UVs calculated for the helper invocations + // correspond to the same face and end up outside the [0, 1] range, but result in correct + // derivatives. Indeed, sampling from any other kind of texture using varyings that range + // from [0, 1] would follow the same behavior (where helper invocations generate UVs out of + // range). if (mIsFragmentShader) { - GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy()); + GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps); } // layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5 @@ -819,6 +919,7 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser TFunction *mCubeXYZToArrayUVL; bool mIsFragmentShader; + bool mUseSubgroupOps; // Stored to be put before the first function after the pass. TIntermFunctionDefinition *mCoordTranslationFunctionDecl; @@ -828,9 +929,11 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root, TSymbolTable *symbolTable, - bool isFragmentShader) + bool isFragmentShader, + bool useSubgroupOps) { - RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader); + RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader, + useSubgroupOps); root->traverse(&traverser); traverser.updateTree(); diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h index 0b58b090b..028f5260f 100644 --- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h +++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h @@ -19,7 +19,8 @@ class TSymbolTable; void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root, TSymbolTable *symbolTable, - bool isFragmentShader); + bool isFragmentShader, + bool useSubgroupOps); } // namespace sh #endif // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_ diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp index 919e8932b..dde0d5d9f 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.cpp +++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp @@ -238,6 +238,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk mClearColorMask(kAllColorChannelsMask), mFlipYForCurrentSurface(false), mIsAnyHostVisibleBufferWritten(false), + mEmulateSeamfulCubeMapSampling(false), + mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false), mLastCompletedQueueSerial(renderer->nextSerial()), mCurrentQueueSerial(renderer->nextSerial()), mPoolAllocator(kDefaultPoolAllocatorPageSize, 1), @@ -441,7 +443,8 @@ angle::Result ContextVk::initialize() ANGLE_TRY(synchronizeCpuGpuTime()); } - mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling(); + mEmulateSeamfulCubeMapSampling = + shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps); return angle::Result::Continue; } @@ -2896,9 +2899,10 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc( return desc; } -bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const +bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const { - if (mState.getClientMajorVersion() != 2) + // Only allow seamful cube map sampling in non-webgl ES2. + if (mState.getClientMajorVersion() != 2 || mState.isWebGL()) { return false; } @@ -2908,17 +2912,15 @@ bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const return false; } + // Use subgroup ops where available. constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT; const VkSubgroupFeatureFlags deviceSupportedOperations = mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations; - bool hasSeamfulCubeMapSubgroupOperations = - (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) == - kSeamfulCubeMapSubgroupOperations; + *useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) == + kSeamfulCubeMapSubgroupOperations; - // Only enable seamful cube map emulation if the necessary subgroup operations are supported. - // Without them, we cannot remove derivative-related artifacts caused by helper invocations. - return hasSeamfulCubeMapSubgroupOperations; + return true; } } // namespace rx diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h index a6c4c83ee..dc607cd26 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.h +++ b/src/libANGLE/renderer/vulkan/ContextVk.h @@ -313,7 +313,11 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO void updateScissor(const gl::State &glState); - bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; } + bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const + { + *useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps; + return mEmulateSeamfulCubeMapSampling; + } private: // Dirty bits. @@ -472,7 +476,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO void waitForSwapchainImageIfNecessary(); - bool shouldEmulateSeamfulCubeMapSampling() const; + bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const; vk::PipelineHelper *mCurrentGraphicsPipeline; vk::PipelineAndSerial *mCurrentComputePipeline; @@ -535,8 +539,10 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO // at the end of the command buffer to make that write available to the host. bool mIsAnyHostVisibleBufferWritten; - // Whether this context should do seamful cube map sampling emulation. + // Whether this context should do seamful cube map sampling emulation, and whether subgroup + // operations should be used. bool mEmulateSeamfulCubeMapSampling; + bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps; struct DriverUniformsDescriptorSet { diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp index 14b5de612..ff8d160e2 100644 --- a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp +++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp @@ -936,7 +936,7 @@ void GlslangWrapper::GetShaderSource(const gl::ProgramState &programState, angle::Result GlslangWrapper::GetShaderCode(vk::Context *context, const gl::Caps &glCaps, bool enableLineRasterEmulation, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodeOut) { @@ -956,20 +956,18 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context, kVersionDefine, kLineRasterDefine), VK_ERROR_INVALID_SHADER_NV); - return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources, - shaderCodeOut); + return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut); } else { - return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources, - shaderCodeOut); + return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut); } } // static angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context, const gl::Caps &glCaps, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodeOut) { @@ -1005,9 +1003,9 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context, glslang::TShader *shader = shaders[shaderType]; shader->setStringsWithLengths(&shaderString, &shaderLength, 1); shader->setEntryPoint("main"); - if (enableSeamfulCubeMapEmulation) + if (enableSubgroupOps) { - // Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations. + // Enable SPIR-V 1.3 if to be able to use subgroup operations. shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3); } diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.h b/src/libANGLE/renderer/vulkan/GlslangWrapper.h index d8afe6232..719bde778 100644 --- a/src/libANGLE/renderer/vulkan/GlslangWrapper.h +++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.h @@ -29,14 +29,14 @@ class GlslangWrapper static angle::Result GetShaderCode(vk::Context *context, const gl::Caps &glCaps, bool enableLineRasterEmulation, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodesOut); private: static angle::Result GetShaderCodeImpl(vk::Context *context, const gl::Caps &glCaps, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodesOut); }; diff --git a/src/libANGLE/renderer/vulkan/ProgramVk.cpp b/src/libANGLE/renderer/vulkan/ProgramVk.cpp index c1a8474af..d9ee7d972 100644 --- a/src/libANGLE/renderer/vulkan/ProgramVk.cpp +++ b/src/libANGLE/renderer/vulkan/ProgramVk.cpp @@ -304,10 +304,16 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk, { ASSERT(!valid()); + bool useSubgroupOpsWithSeamfulCubeMapEmulation = false; + bool emulateSeamfulCubeMapSampling = + contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation); + bool useSubgroupOps = + emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation; + gl::ShaderMap> shaderCodes; - ANGLE_TRY(GlslangWrapper::GetShaderCode( - contextVk, contextVk->getCaps(), enableLineRasterEmulation, - contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes)); + ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(), + enableLineRasterEmulation, useSubgroupOps, + shaderSources, &shaderCodes)); for (const gl::ShaderType shaderType : gl::AllShaderTypes()) { @@ -1437,7 +1443,8 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk) const gl::ActiveTextureArray &activeTextures = contextVk->getActiveTextures(); - bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(); + bool useSubgroupOps = false; + bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps); for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size(); ++textureIndex) diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp index f09b29102..0d60b1e02 100644 --- a/src/libANGLE/renderer/vulkan/RendererVk.cpp +++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp @@ -1279,6 +1279,7 @@ void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames) if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID)) { + // Disabled on AMD/windows due to buggy behavior. mFeatures.disallowSeamfulCubeMapEmulation.enabled = true; } diff --git a/src/libANGLE/renderer/vulkan/ShaderVk.cpp b/src/libANGLE/renderer/vulkan/ShaderVk.cpp index 11b3f88c2..cf5967923 100644 --- a/src/libANGLE/renderer/vulkan/ShaderVk.cpp +++ b/src/libANGLE/renderer/vulkan/ShaderVk.cpp @@ -40,9 +40,17 @@ std::shared_ptr ShaderVk::compile(const gl::Context *conte compileOptions |= SH_CLAMP_POINT_SIZE; } - if (contextVk->emulateSeamfulCubeMapSampling()) + bool useSubgroupOps = false; + if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps)) { - compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING; + if (useSubgroupOps) + { + compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP; + } + else + { + compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING; + } } return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options); diff --git a/src/tests/deqp_support/deqp_gles2_test_expectations.txt b/src/tests/deqp_support/deqp_gles2_test_expectations.txt index 275952406..9c37b8b68 100644 --- a/src/tests/deqp_support/deqp_gles2_test_expectations.txt +++ b/src/tests/deqp_support/deqp_gles2_test_expectations.txt @@ -329,25 +329,6 @@ 3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL 3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL -// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations). -3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL - // These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors. 3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL 3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL