Vulkan: Emulate subgroup ops in seamful cubemap emulation

Where subgroup ops are not available, they are emulated as such:

Code with subgroup ops:

    float lH = subgroupQuadSwapHorizontal(layer);
    float lV = subgroupQuadSwapVertical(layer);
    float lD = subgroupQuadSwapDiagonal(layer);

    bool isHelperH = subgroupQuadSwapHorizontal(gl_HelperInvocation);
    bool isHelperV = subgroupQuadSwapVertical(gl_HelperInvocation);

    if (gl_HelperInvocation)
    {
        layer = !isHelperH ? lH : !isHelperV ? lV : lD;
    }

Emulated code:

    float nonHelperLayer = gl_HelperInvocation ? 0.0 : layer;
    float lH = abs(dFdxFine(nonHelperLayer));
    float lV = abs(dFdyFine(nonHelperLayer));
    float lD = abs(dFdxFine(lV));

    float isHelperDiffH = abs(dFdxFine(float(gl_HelperInvocation)));
    bool isNonHelperH = isHelperDiffH > 0.5;

    float isHelperDiffV = abs(dFdyFine(float(gl_HelperInvocation)));
    bool isNonHelperV = isHelperDiffV > 0.5;

    if (gl_HelperInvocation)
    {
        layer = isNonHelperH ? lH : isNonHelperV ? lV : lD;
    }

Both paths are supported as on nvidia devices the emulated code
misbehaves.  This change therefore effectively only enables seamful cube
map emulation on Android where subgroup operations are not supported.

Bug: angleproject:3243
Bug: angleproject:3732
Change-Id: I9664d9760756758748183eb121c626f176789f3a
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1742222
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
This commit is contained in:
Shahbaz Youssefi 2019-08-07 14:44:12 -04:00 коммит произвёл Commit Bot
Родитель 201419404c
Коммит 5a2553a7b6
12 изменённых файлов: 216 добавлений и 102 удалений

Просмотреть файл

@ -26,7 +26,7 @@
// Version number for shader translation API.
// It is incremented every time the API changes.
#define ANGLE_SH_VERSION 211
#define ANGLE_SH_VERSION 212
enum ShShaderSpec
{
@ -288,9 +288,14 @@ const ShCompileOptions SH_FORCE_ATOMIC_VALUE_RESOLUTION = UINT64_C(1) << 42;
const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43;
// Emulate seamful cube map sampling for OpenGL ES2.0. Currently only applies to the Vulkan
// backend, as subgroup operations are used. Once that dependency is broken, could be used with
// backend, as is done after samplers are moved out of structs. Can likely be made to work on
// the other backends as well.
//
// There are two variations of this. One using subgroup operations where available, and another
// that emulates those operations using dFdxFine and dFdyFine. The latter is more universally
// available, but is buggy on Nvidia.
const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45;
// Defines alternate strategies for implementing array index clamping.
enum ShArrayIndexClampingStrategy

Просмотреть файл

@ -645,7 +645,7 @@ void TranslatorVulkan::translate(TIntermBlock *root,
sink << "#version 450 core\n";
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)
{
sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
}
@ -688,10 +688,12 @@ void TranslatorVulkan::translate(TIntermBlock *root,
// Rewrite samplerCubes as sampler2DArrays. This must be done after rewriting struct samplers
// as it doesn't expect that.
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING |
SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP))
{
RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(),
getShaderType() == GL_FRAGMENT_SHADER);
RewriteCubeMapSamplersAs2DArray(
root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER,
compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP);
}
if (defaultUniformCount > 0)

Просмотреть файл

@ -39,9 +39,56 @@ TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable,
return neighbor;
}
// Calculate the difference of a value with another invocation in the quad. Used to emulate
// GetValueFromNeighbor where subgroup operations are not present.
//
// See comment in declareCoordTranslationFunction.
TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable,
TIntermBlock *body,
TFunction *dFdxyFine,
TIntermTyped *variable,
const TType *variableType)
{
TIntermTyped *neighborValue =
TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable}));
TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr);
TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue));
return neighbor;
}
// Used to emulate GetValueFromNeighbor with bool values.
TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable,
TIntermBlock *body,
TFunction *dFdxyFine,
TIntermTyped *gl_HelperInvocation)
{
const TType *boolType = StaticType::GetBasic<EbtBool>();
const TType *floatType = StaticType::GetBasic<EbtFloat>();
TIntermTyped *gl_HelperInvocationAsFloat =
TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation}));
TIntermSymbol *diffWithNeighbor =
GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType);
TIntermTyped *isNeighborNonHelperValue =
new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f));
TIntermSymbol *isNeighborNonHelper =
new TIntermSymbol(CreateTempVariable(symbolTable, boolType));
body->appendStatement(
CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue));
return isNeighborNonHelper;
}
// If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
// in the quad that is not a helper. See comment in declareCoordTranslationFunction.
void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *body, TIntermTyped *l)
void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable,
TIntermBlock *body,
TIntermTyped *l,
bool useSubgroupOps)
{
TVariable *gl_HelperInvocationVar =
new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
@ -50,48 +97,97 @@ void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *bo
const TType *boolType = StaticType::GetBasic<EbtBool>();
const TType *floatType = StaticType::GetBasic<EbtFloat>();
TFunction *quadSwapHorizontalBool =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
SymbolType::AngleInternal, boolType, true);
TFunction *quadSwapHorizontalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
SymbolType::AngleInternal, floatType, true);
TFunction *quadSwapVerticalBool =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
SymbolType::AngleInternal, boolType, true);
TFunction *quadSwapVerticalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
SymbolType::AngleInternal, floatType, true);
TFunction *quadSwapDiagonalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
SymbolType::AngleInternal, floatType, true);
quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
TIntermSymbol *lH;
TIntermSymbol *lV;
TIntermSymbol *lD;
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
// outside `if`s so the non-helper thread is not turned inactive.
TIntermSymbol *lH =
GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
TIntermSymbol *lV =
GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType);
TIntermSymbol *lD =
GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType);
TIntermTyped *horizontalIsNonHelper;
TIntermTyped *verticalIsNonHelper;
// Get the value of gl_HelperInvocation from the neighbors too.
TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
if (useSubgroupOps)
{
TFunction *quadSwapHorizontalBool =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
SymbolType::AngleInternal, boolType, true);
TFunction *quadSwapHorizontalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
SymbolType::AngleInternal, floatType, true);
TFunction *quadSwapVerticalBool =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
SymbolType::AngleInternal, boolType, true);
TFunction *quadSwapVerticalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
SymbolType::AngleInternal, floatType, true);
TFunction *quadSwapDiagonalFloat =
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
SymbolType::AngleInternal, floatType, true);
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not
// enough to test if the neighbor is not a helper, we should also check if it's active.
TIntermTyped *horizontalIsNonHelper =
new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
// outside `if`s so the non-helper thread is not turned inactive.
lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(),
floatType);
lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(),
floatType);
// Get the value of gl_HelperInvocation from the neighbors too.
TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
// not enough to test if the neighbor is not a helper, we should also check if it's active.
horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
}
else
{
TFunction *dFdxFineBool = new TFunction(symbolTable, ImmutableString("dFdxFine"),
SymbolType::AngleInternal, boolType, true);
TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"),
SymbolType::AngleInternal, floatType, true);
TFunction *dFdyFineBool = new TFunction(symbolTable, ImmutableString("dFdyFine"),
SymbolType::AngleInternal, boolType, true);
TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"),
SymbolType::AngleInternal, floatType, true);
dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
// layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer;
TIntermTyped *layerQuadSwapHelperValue =
new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l);
TIntermSymbol *layerQuadSwapHelper =
new TIntermSymbol(CreateTempVariable(symbolTable, floatType));
body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(),
layerQuadSwapHelperValue));
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
// outside `if`s so the non-helper thread is not turned inactive.
lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType);
lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(),
floatType);
lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType);
// Get the value of gl_HelperInvocation from the neighbors too.
//
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
// not enough to test if the neighbor is not a helper, we should also check if it's active.
horizontalIsNonHelper =
IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy());
verticalIsNonHelper =
IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy());
}
TIntermTyped *lVD = new TIntermTernary(verticalIsNonHelper, lV, lD);
TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
@ -163,10 +259,13 @@ void TransformZMajor(TIntermBlock *block,
class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
{
public:
RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader)
RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable,
bool isFragmentShader,
bool useSubgroupOps)
: TIntermTraverser(true, true, true, symbolTable),
mCubeXYZToArrayUVL(nullptr),
mIsFragmentShader(isFragmentShader),
mUseSubgroupOps(useSubgroupOps),
mCoordTranslationFunctionDecl(nullptr)
{}
@ -543,14 +642,15 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
// incorrect and the wrong mip would be selected.
//
// We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
// operations to retrieve the layer from a non-helper invocation. As a result, the UVs
// calculated for the helper invocations correspond to the same face and end up outside the
// [0, 1] range, but result in correct derivatives. Indeed, sampling from any other kind of
// texture using varyings that range from [0, 1] would follow the same behavior (where
// helper invocations generate UVs out of range).
// (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a
// non-helper invocation. As a result, the UVs calculated for the helper invocations
// correspond to the same face and end up outside the [0, 1] range, but result in correct
// derivatives. Indeed, sampling from any other kind of texture using varyings that range
// from [0, 1] would follow the same behavior (where helper invocations generate UVs out of
// range).
if (mIsFragmentShader)
{
GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy());
GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps);
}
// layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
@ -819,6 +919,7 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
TFunction *mCubeXYZToArrayUVL;
bool mIsFragmentShader;
bool mUseSubgroupOps;
// Stored to be put before the first function after the pass.
TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
@ -828,9 +929,11 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
TSymbolTable *symbolTable,
bool isFragmentShader)
bool isFragmentShader,
bool useSubgroupOps)
{
RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader);
RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader,
useSubgroupOps);
root->traverse(&traverser);
traverser.updateTree();

Просмотреть файл

@ -19,7 +19,8 @@ class TSymbolTable;
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
TSymbolTable *symbolTable,
bool isFragmentShader);
bool isFragmentShader,
bool useSubgroupOps);
} // namespace sh
#endif // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_

Просмотреть файл

@ -238,6 +238,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
mClearColorMask(kAllColorChannelsMask),
mFlipYForCurrentSurface(false),
mIsAnyHostVisibleBufferWritten(false),
mEmulateSeamfulCubeMapSampling(false),
mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false),
mLastCompletedQueueSerial(renderer->nextSerial()),
mCurrentQueueSerial(renderer->nextSerial()),
mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
@ -441,7 +443,8 @@ angle::Result ContextVk::initialize()
ANGLE_TRY(synchronizeCpuGpuTime());
}
mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling();
mEmulateSeamfulCubeMapSampling =
shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps);
return angle::Result::Continue;
}
@ -2896,9 +2899,10 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc(
return desc;
}
bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
{
if (mState.getClientMajorVersion() != 2)
// Only allow seamful cube map sampling in non-webgl ES2.
if (mState.getClientMajorVersion() != 2 || mState.isWebGL())
{
return false;
}
@ -2908,17 +2912,15 @@ bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
return false;
}
// Use subgroup ops where available.
constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT;
const VkSubgroupFeatureFlags deviceSupportedOperations =
mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
bool hasSeamfulCubeMapSubgroupOperations =
(deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
kSeamfulCubeMapSubgroupOperations;
*useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
kSeamfulCubeMapSubgroupOperations;
// Only enable seamful cube map emulation if the necessary subgroup operations are supported.
// Without them, we cannot remove derivative-related artifacts caused by helper invocations.
return hasSeamfulCubeMapSubgroupOperations;
return true;
}
} // namespace rx

Просмотреть файл

@ -313,7 +313,11 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
void updateScissor(const gl::State &glState);
bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; }
bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
{
*useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
return mEmulateSeamfulCubeMapSampling;
}
private:
// Dirty bits.
@ -472,7 +476,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
void waitForSwapchainImageIfNecessary();
bool shouldEmulateSeamfulCubeMapSampling() const;
bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const;
vk::PipelineHelper *mCurrentGraphicsPipeline;
vk::PipelineAndSerial *mCurrentComputePipeline;
@ -535,8 +539,10 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
// at the end of the command buffer to make that write available to the host.
bool mIsAnyHostVisibleBufferWritten;
// Whether this context should do seamful cube map sampling emulation.
// Whether this context should do seamful cube map sampling emulation, and whether subgroup
// operations should be used.
bool mEmulateSeamfulCubeMapSampling;
bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
struct DriverUniformsDescriptorSet
{

Просмотреть файл

@ -936,7 +936,7 @@ void GlslangWrapper::GetShaderSource(const gl::ProgramState &programState,
angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
const gl::Caps &glCaps,
bool enableLineRasterEmulation,
bool enableSeamfulCubeMapEmulation,
bool enableSubgroupOps,
const gl::ShaderMap<std::string> &shaderSources,
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
{
@ -956,20 +956,18 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
kVersionDefine, kLineRasterDefine),
VK_ERROR_INVALID_SHADER_NV);
return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources,
shaderCodeOut);
return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut);
}
else
{
return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources,
shaderCodeOut);
return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut);
}
}
// static
angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
const gl::Caps &glCaps,
bool enableSeamfulCubeMapEmulation,
bool enableSubgroupOps,
const gl::ShaderMap<std::string> &shaderSources,
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
{
@ -1005,9 +1003,9 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
glslang::TShader *shader = shaders[shaderType];
shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
shader->setEntryPoint("main");
if (enableSeamfulCubeMapEmulation)
if (enableSubgroupOps)
{
// Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations.
// Enable SPIR-V 1.3 if to be able to use subgroup operations.
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
}

Просмотреть файл

@ -29,14 +29,14 @@ class GlslangWrapper
static angle::Result GetShaderCode(vk::Context *context,
const gl::Caps &glCaps,
bool enableLineRasterEmulation,
bool enableSeamfulCubeMapEmulation,
bool enableSubgroupOps,
const gl::ShaderMap<std::string> &shaderSources,
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
private:
static angle::Result GetShaderCodeImpl(vk::Context *context,
const gl::Caps &glCaps,
bool enableSeamfulCubeMapEmulation,
bool enableSubgroupOps,
const gl::ShaderMap<std::string> &shaderSources,
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
};

Просмотреть файл

@ -304,10 +304,16 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk,
{
ASSERT(!valid());
bool useSubgroupOpsWithSeamfulCubeMapEmulation = false;
bool emulateSeamfulCubeMapSampling =
contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation);
bool useSubgroupOps =
emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation;
gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
ANGLE_TRY(GlslangWrapper::GetShaderCode(
contextVk, contextVk->getCaps(), enableLineRasterEmulation,
contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes));
ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(),
enableLineRasterEmulation, useSubgroupOps,
shaderSources, &shaderCodes));
for (const gl::ShaderType shaderType : gl::AllShaderTypes())
{
@ -1437,7 +1443,8 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk)
const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
bool useSubgroupOps = false;
bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps);
for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size();
++textureIndex)

Просмотреть файл

@ -1279,6 +1279,7 @@ void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames)
if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID))
{
// Disabled on AMD/windows due to buggy behavior.
mFeatures.disallowSeamfulCubeMapEmulation.enabled = true;
}

Просмотреть файл

@ -40,9 +40,17 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte
compileOptions |= SH_CLAMP_POINT_SIZE;
}
if (contextVk->emulateSeamfulCubeMapSampling())
bool useSubgroupOps = false;
if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps))
{
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
if (useSubgroupOps)
{
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP;
}
else
{
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
}
}
return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options);

Просмотреть файл

@ -329,25 +329,6 @@
3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL
3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL
// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations).
3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
// These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors.
3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL