diff --git a/include/GLSLANG/ShaderLang.h b/include/GLSLANG/ShaderLang.h index c26310ac2..0134c59e9 100644 --- a/include/GLSLANG/ShaderLang.h +++ b/include/GLSLANG/ShaderLang.h @@ -26,7 +26,7 @@ // Version number for shader translation API. // It is incremented every time the API changes. -#define ANGLE_SH_VERSION 211 +#define ANGLE_SH_VERSION 212 enum ShShaderSpec { @@ -288,9 +288,14 @@ const ShCompileOptions SH_FORCE_ATOMIC_VALUE_RESOLUTION = UINT64_C(1) << 42; const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43; // Emulate seamful cube map sampling for OpenGL ES2.0. Currently only applies to the Vulkan -// backend, as subgroup operations are used. Once that dependency is broken, could be used with +// backend, as is done after samplers are moved out of structs. Can likely be made to work on // the other backends as well. +// +// There are two variations of this. One using subgroup operations where available, and another +// that emulates those operations using dFdxFine and dFdyFine. The latter is more universally +// available, but is buggy on Nvidia. const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44; +const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45; // Defines alternate strategies for implementing array index clamping. enum ShArrayIndexClampingStrategy diff --git a/src/compiler/translator/TranslatorVulkan.cpp b/src/compiler/translator/TranslatorVulkan.cpp index 3819e5180..fe4560cc2 100644 --- a/src/compiler/translator/TranslatorVulkan.cpp +++ b/src/compiler/translator/TranslatorVulkan.cpp @@ -645,7 +645,7 @@ void TranslatorVulkan::translate(TIntermBlock *root, sink << "#version 450 core\n"; - if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING) + if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP) { sink << "#extension GL_KHR_shader_subgroup_quad : require\n"; } @@ -688,10 +688,12 @@ void TranslatorVulkan::translate(TIntermBlock *root, // Rewrite samplerCubes as sampler2DArrays. This must be done after rewriting struct samplers // as it doesn't expect that. - if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING) + if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING | + SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)) { - RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(), - getShaderType() == GL_FRAGMENT_SHADER); + RewriteCubeMapSamplersAs2DArray( + root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER, + compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP); } if (defaultUniformCount > 0) diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp index bc1a2ae36..92012027d 100644 --- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp +++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.cpp @@ -39,9 +39,56 @@ TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable, return neighbor; } +// Calculate the difference of a value with another invocation in the quad. Used to emulate +// GetValueFromNeighbor where subgroup operations are not present. +// +// See comment in declareCoordTranslationFunction. +TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable, + TIntermBlock *body, + TFunction *dFdxyFine, + TIntermTyped *variable, + const TType *variableType) +{ + TIntermTyped *neighborValue = + TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable})); + TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr); + + TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType)); + body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue)); + + return neighbor; +} + +// Used to emulate GetValueFromNeighbor with bool values. +TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable, + TIntermBlock *body, + TFunction *dFdxyFine, + TIntermTyped *gl_HelperInvocation) +{ + const TType *boolType = StaticType::GetBasic(); + const TType *floatType = StaticType::GetBasic(); + + TIntermTyped *gl_HelperInvocationAsFloat = + TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation})); + TIntermSymbol *diffWithNeighbor = + GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType); + + TIntermTyped *isNeighborNonHelperValue = + new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f)); + TIntermSymbol *isNeighborNonHelper = + new TIntermSymbol(CreateTempVariable(symbolTable, boolType)); + body->appendStatement( + CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue)); + + return isNeighborNonHelper; +} + // If this is a helper invocation, retrieve the layer index (cube map face) from another invocation // in the quad that is not a helper. See comment in declareCoordTranslationFunction. -void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *body, TIntermTyped *l) +void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, + TIntermBlock *body, + TIntermTyped *l, + bool useSubgroupOps) { TVariable *gl_HelperInvocationVar = new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"), @@ -50,48 +97,97 @@ void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *bo const TType *boolType = StaticType::GetBasic(); const TType *floatType = StaticType::GetBasic(); - TFunction *quadSwapHorizontalBool = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), - SymbolType::AngleInternal, boolType, true); - TFunction *quadSwapHorizontalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), - SymbolType::AngleInternal, floatType, true); - TFunction *quadSwapVerticalBool = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), - SymbolType::AngleInternal, boolType, true); - TFunction *quadSwapVerticalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), - SymbolType::AngleInternal, floatType, true); - TFunction *quadSwapDiagonalFloat = - new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"), - SymbolType::AngleInternal, floatType, true); - quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType)); - quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType)); - quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); - quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); - quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + TIntermSymbol *lH; + TIntermSymbol *lV; + TIntermSymbol *lD; - // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done - // outside `if`s so the non-helper thread is not turned inactive. - TIntermSymbol *lH = - GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType); - TIntermSymbol *lV = - GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType); - TIntermSymbol *lD = - GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType); + TIntermTyped *horizontalIsNonHelper; + TIntermTyped *verticalIsNonHelper; - // Get the value of gl_HelperInvocation from the neighbors too. - TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor( - symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType); - TIntermSymbol *verticalIsHelper = GetValueFromNeighbor( - symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType); + if (useSubgroupOps) + { + TFunction *quadSwapHorizontalBool = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), + SymbolType::AngleInternal, boolType, true); + TFunction *quadSwapHorizontalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"), + SymbolType::AngleInternal, floatType, true); + TFunction *quadSwapVerticalBool = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), + SymbolType::AngleInternal, boolType, true); + TFunction *quadSwapVerticalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"), + SymbolType::AngleInternal, floatType, true); + TFunction *quadSwapDiagonalFloat = + new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"), + SymbolType::AngleInternal, floatType, true); - // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not - // enough to test if the neighbor is not a helper, we should also check if it's active. - TIntermTyped *horizontalIsNonHelper = - new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr); - TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr); + quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType)); + quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType)); + quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + + // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done + // outside `if`s so the non-helper thread is not turned inactive. + lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType); + lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), + floatType); + lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), + floatType); + + // Get the value of gl_HelperInvocation from the neighbors too. + TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor( + symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType); + TIntermSymbol *verticalIsHelper = GetValueFromNeighbor( + symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType); + + // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's + // not enough to test if the neighbor is not a helper, we should also check if it's active. + horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr); + verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr); + } + else + { + TFunction *dFdxFineBool = new TFunction(symbolTable, ImmutableString("dFdxFine"), + SymbolType::AngleInternal, boolType, true); + TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"), + SymbolType::AngleInternal, floatType, true); + TFunction *dFdyFineBool = new TFunction(symbolTable, ImmutableString("dFdyFine"), + SymbolType::AngleInternal, boolType, true); + TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"), + SymbolType::AngleInternal, floatType, true); + + dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType)); + dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType)); + dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType)); + + // layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer; + TIntermTyped *layerQuadSwapHelperValue = + new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l); + TIntermSymbol *layerQuadSwapHelper = + new TIntermSymbol(CreateTempVariable(symbolTable, floatType)); + body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(), + layerQuadSwapHelperValue)); + + // Get the layer from the horizontal, vertical and diagonal neighbor. These should be done + // outside `if`s so the non-helper thread is not turned inactive. + lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType); + lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(), + floatType); + lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType); + + // Get the value of gl_HelperInvocation from the neighbors too. + // + // Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's + // not enough to test if the neighbor is not a helper, we should also check if it's active. + horizontalIsNonHelper = + IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy()); + verticalIsNonHelper = + IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy()); + } TIntermTyped *lVD = new TIntermTernary(verticalIsNonHelper, lV, lD); TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD); @@ -163,10 +259,13 @@ void TransformZMajor(TIntermBlock *block, class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser { public: - RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader) + RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, + bool isFragmentShader, + bool useSubgroupOps) : TIntermTraverser(true, true, true, symbolTable), mCubeXYZToArrayUVL(nullptr), mIsFragmentShader(isFragmentShader), + mUseSubgroupOps(useSubgroupOps), mCoordTranslationFunctionDecl(nullptr) {} @@ -543,14 +642,15 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser // incorrect and the wrong mip would be selected. // // We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap* - // operations to retrieve the layer from a non-helper invocation. As a result, the UVs - // calculated for the helper invocations correspond to the same face and end up outside the - // [0, 1] range, but result in correct derivatives. Indeed, sampling from any other kind of - // texture using varyings that range from [0, 1] would follow the same behavior (where - // helper invocations generate UVs out of range). + // (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a + // non-helper invocation. As a result, the UVs calculated for the helper invocations + // correspond to the same face and end up outside the [0, 1] range, but result in correct + // derivatives. Indeed, sampling from any other kind of texture using varyings that range + // from [0, 1] would follow the same behavior (where helper invocations generate UVs out of + // range). if (mIsFragmentShader) { - GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy()); + GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps); } // layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5 @@ -819,6 +919,7 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser TFunction *mCubeXYZToArrayUVL; bool mIsFragmentShader; + bool mUseSubgroupOps; // Stored to be put before the first function after the pass. TIntermFunctionDefinition *mCoordTranslationFunctionDecl; @@ -828,9 +929,11 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root, TSymbolTable *symbolTable, - bool isFragmentShader) + bool isFragmentShader, + bool useSubgroupOps) { - RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader); + RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader, + useSubgroupOps); root->traverse(&traverser); traverser.updateTree(); diff --git a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h index 0b58b090b..028f5260f 100644 --- a/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h +++ b/src/compiler/translator/tree_ops/RewriteCubeMapSamplersAs2DArray.h @@ -19,7 +19,8 @@ class TSymbolTable; void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root, TSymbolTable *symbolTable, - bool isFragmentShader); + bool isFragmentShader, + bool useSubgroupOps); } // namespace sh #endif // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_ diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp index 919e8932b..dde0d5d9f 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.cpp +++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp @@ -238,6 +238,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk mClearColorMask(kAllColorChannelsMask), mFlipYForCurrentSurface(false), mIsAnyHostVisibleBufferWritten(false), + mEmulateSeamfulCubeMapSampling(false), + mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false), mLastCompletedQueueSerial(renderer->nextSerial()), mCurrentQueueSerial(renderer->nextSerial()), mPoolAllocator(kDefaultPoolAllocatorPageSize, 1), @@ -441,7 +443,8 @@ angle::Result ContextVk::initialize() ANGLE_TRY(synchronizeCpuGpuTime()); } - mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling(); + mEmulateSeamfulCubeMapSampling = + shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps); return angle::Result::Continue; } @@ -2896,9 +2899,10 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc( return desc; } -bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const +bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const { - if (mState.getClientMajorVersion() != 2) + // Only allow seamful cube map sampling in non-webgl ES2. + if (mState.getClientMajorVersion() != 2 || mState.isWebGL()) { return false; } @@ -2908,17 +2912,15 @@ bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const return false; } + // Use subgroup ops where available. constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT; const VkSubgroupFeatureFlags deviceSupportedOperations = mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations; - bool hasSeamfulCubeMapSubgroupOperations = - (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) == - kSeamfulCubeMapSubgroupOperations; + *useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) == + kSeamfulCubeMapSubgroupOperations; - // Only enable seamful cube map emulation if the necessary subgroup operations are supported. - // Without them, we cannot remove derivative-related artifacts caused by helper invocations. - return hasSeamfulCubeMapSubgroupOperations; + return true; } } // namespace rx diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h index a6c4c83ee..dc607cd26 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.h +++ b/src/libANGLE/renderer/vulkan/ContextVk.h @@ -313,7 +313,11 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO void updateScissor(const gl::State &glState); - bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; } + bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const + { + *useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps; + return mEmulateSeamfulCubeMapSampling; + } private: // Dirty bits. @@ -472,7 +476,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO void waitForSwapchainImageIfNecessary(); - bool shouldEmulateSeamfulCubeMapSampling() const; + bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const; vk::PipelineHelper *mCurrentGraphicsPipeline; vk::PipelineAndSerial *mCurrentComputePipeline; @@ -535,8 +539,10 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO // at the end of the command buffer to make that write available to the host. bool mIsAnyHostVisibleBufferWritten; - // Whether this context should do seamful cube map sampling emulation. + // Whether this context should do seamful cube map sampling emulation, and whether subgroup + // operations should be used. bool mEmulateSeamfulCubeMapSampling; + bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps; struct DriverUniformsDescriptorSet { diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp index 14b5de612..ff8d160e2 100644 --- a/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp +++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.cpp @@ -936,7 +936,7 @@ void GlslangWrapper::GetShaderSource(const gl::ProgramState &programState, angle::Result GlslangWrapper::GetShaderCode(vk::Context *context, const gl::Caps &glCaps, bool enableLineRasterEmulation, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodeOut) { @@ -956,20 +956,18 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context, kVersionDefine, kLineRasterDefine), VK_ERROR_INVALID_SHADER_NV); - return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources, - shaderCodeOut); + return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut); } else { - return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources, - shaderCodeOut); + return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut); } } // static angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context, const gl::Caps &glCaps, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodeOut) { @@ -1005,9 +1003,9 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context, glslang::TShader *shader = shaders[shaderType]; shader->setStringsWithLengths(&shaderString, &shaderLength, 1); shader->setEntryPoint("main"); - if (enableSeamfulCubeMapEmulation) + if (enableSubgroupOps) { - // Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations. + // Enable SPIR-V 1.3 if to be able to use subgroup operations. shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3); } diff --git a/src/libANGLE/renderer/vulkan/GlslangWrapper.h b/src/libANGLE/renderer/vulkan/GlslangWrapper.h index d8afe6232..719bde778 100644 --- a/src/libANGLE/renderer/vulkan/GlslangWrapper.h +++ b/src/libANGLE/renderer/vulkan/GlslangWrapper.h @@ -29,14 +29,14 @@ class GlslangWrapper static angle::Result GetShaderCode(vk::Context *context, const gl::Caps &glCaps, bool enableLineRasterEmulation, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodesOut); private: static angle::Result GetShaderCodeImpl(vk::Context *context, const gl::Caps &glCaps, - bool enableSeamfulCubeMapEmulation, + bool enableSubgroupOps, const gl::ShaderMap &shaderSources, gl::ShaderMap> *shaderCodesOut); }; diff --git a/src/libANGLE/renderer/vulkan/ProgramVk.cpp b/src/libANGLE/renderer/vulkan/ProgramVk.cpp index c1a8474af..d9ee7d972 100644 --- a/src/libANGLE/renderer/vulkan/ProgramVk.cpp +++ b/src/libANGLE/renderer/vulkan/ProgramVk.cpp @@ -304,10 +304,16 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk, { ASSERT(!valid()); + bool useSubgroupOpsWithSeamfulCubeMapEmulation = false; + bool emulateSeamfulCubeMapSampling = + contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation); + bool useSubgroupOps = + emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation; + gl::ShaderMap> shaderCodes; - ANGLE_TRY(GlslangWrapper::GetShaderCode( - contextVk, contextVk->getCaps(), enableLineRasterEmulation, - contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes)); + ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(), + enableLineRasterEmulation, useSubgroupOps, + shaderSources, &shaderCodes)); for (const gl::ShaderType shaderType : gl::AllShaderTypes()) { @@ -1437,7 +1443,8 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk) const gl::ActiveTextureArray &activeTextures = contextVk->getActiveTextures(); - bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(); + bool useSubgroupOps = false; + bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps); for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size(); ++textureIndex) diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp index f09b29102..0d60b1e02 100644 --- a/src/libANGLE/renderer/vulkan/RendererVk.cpp +++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp @@ -1279,6 +1279,7 @@ void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames) if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID)) { + // Disabled on AMD/windows due to buggy behavior. mFeatures.disallowSeamfulCubeMapEmulation.enabled = true; } diff --git a/src/libANGLE/renderer/vulkan/ShaderVk.cpp b/src/libANGLE/renderer/vulkan/ShaderVk.cpp index 11b3f88c2..cf5967923 100644 --- a/src/libANGLE/renderer/vulkan/ShaderVk.cpp +++ b/src/libANGLE/renderer/vulkan/ShaderVk.cpp @@ -40,9 +40,17 @@ std::shared_ptr ShaderVk::compile(const gl::Context *conte compileOptions |= SH_CLAMP_POINT_SIZE; } - if (contextVk->emulateSeamfulCubeMapSampling()) + bool useSubgroupOps = false; + if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps)) { - compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING; + if (useSubgroupOps) + { + compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP; + } + else + { + compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING; + } } return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options); diff --git a/src/tests/deqp_support/deqp_gles2_test_expectations.txt b/src/tests/deqp_support/deqp_gles2_test_expectations.txt index 275952406..9c37b8b68 100644 --- a/src/tests/deqp_support/deqp_gles2_test_expectations.txt +++ b/src/tests/deqp_support/deqp_gles2_test_expectations.txt @@ -329,25 +329,6 @@ 3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL 3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL -// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations). -3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL -3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL - // These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors. 3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL 3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL