зеркало из https://github.com/AvaloniaUI/angle.git
Vulkan: Emulate subgroup ops in seamful cubemap emulation
Where subgroup ops are not available, they are emulated as such: Code with subgroup ops: float lH = subgroupQuadSwapHorizontal(layer); float lV = subgroupQuadSwapVertical(layer); float lD = subgroupQuadSwapDiagonal(layer); bool isHelperH = subgroupQuadSwapHorizontal(gl_HelperInvocation); bool isHelperV = subgroupQuadSwapVertical(gl_HelperInvocation); if (gl_HelperInvocation) { layer = !isHelperH ? lH : !isHelperV ? lV : lD; } Emulated code: float nonHelperLayer = gl_HelperInvocation ? 0.0 : layer; float lH = abs(dFdxFine(nonHelperLayer)); float lV = abs(dFdyFine(nonHelperLayer)); float lD = abs(dFdxFine(lV)); float isHelperDiffH = abs(dFdxFine(float(gl_HelperInvocation))); bool isNonHelperH = isHelperDiffH > 0.5; float isHelperDiffV = abs(dFdyFine(float(gl_HelperInvocation))); bool isNonHelperV = isHelperDiffV > 0.5; if (gl_HelperInvocation) { layer = isNonHelperH ? lH : isNonHelperV ? lV : lD; } Both paths are supported as on nvidia devices the emulated code misbehaves. This change therefore effectively only enables seamful cube map emulation on Android where subgroup operations are not supported. Bug: angleproject:3243 Bug: angleproject:3732 Change-Id: I9664d9760756758748183eb121c626f176789f3a Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1742222 Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org> Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
This commit is contained in:
Родитель
201419404c
Коммит
5a2553a7b6
|
@ -26,7 +26,7 @@
|
||||||
|
|
||||||
// Version number for shader translation API.
|
// Version number for shader translation API.
|
||||||
// It is incremented every time the API changes.
|
// It is incremented every time the API changes.
|
||||||
#define ANGLE_SH_VERSION 211
|
#define ANGLE_SH_VERSION 212
|
||||||
|
|
||||||
enum ShShaderSpec
|
enum ShShaderSpec
|
||||||
{
|
{
|
||||||
|
@ -288,9 +288,14 @@ const ShCompileOptions SH_FORCE_ATOMIC_VALUE_RESOLUTION = UINT64_C(1) << 42;
|
||||||
const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43;
|
const ShCompileOptions SH_EMULATE_GL_BASE_VERTEX_BASE_INSTANCE = UINT64_C(1) << 43;
|
||||||
|
|
||||||
// Emulate seamful cube map sampling for OpenGL ES2.0. Currently only applies to the Vulkan
|
// Emulate seamful cube map sampling for OpenGL ES2.0. Currently only applies to the Vulkan
|
||||||
// backend, as subgroup operations are used. Once that dependency is broken, could be used with
|
// backend, as is done after samplers are moved out of structs. Can likely be made to work on
|
||||||
// the other backends as well.
|
// the other backends as well.
|
||||||
|
//
|
||||||
|
// There are two variations of this. One using subgroup operations where available, and another
|
||||||
|
// that emulates those operations using dFdxFine and dFdyFine. The latter is more universally
|
||||||
|
// available, but is buggy on Nvidia.
|
||||||
const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
|
const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING = UINT64_C(1) << 44;
|
||||||
|
const ShCompileOptions SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP = UINT64_C(1) << 45;
|
||||||
|
|
||||||
// Defines alternate strategies for implementing array index clamping.
|
// Defines alternate strategies for implementing array index clamping.
|
||||||
enum ShArrayIndexClampingStrategy
|
enum ShArrayIndexClampingStrategy
|
||||||
|
|
|
@ -645,7 +645,7 @@ void TranslatorVulkan::translate(TIntermBlock *root,
|
||||||
|
|
||||||
sink << "#version 450 core\n";
|
sink << "#version 450 core\n";
|
||||||
|
|
||||||
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
|
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP)
|
||||||
{
|
{
|
||||||
sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
|
sink << "#extension GL_KHR_shader_subgroup_quad : require\n";
|
||||||
}
|
}
|
||||||
|
@ -688,10 +688,12 @@ void TranslatorVulkan::translate(TIntermBlock *root,
|
||||||
|
|
||||||
// Rewrite samplerCubes as sampler2DArrays. This must be done after rewriting struct samplers
|
// Rewrite samplerCubes as sampler2DArrays. This must be done after rewriting struct samplers
|
||||||
// as it doesn't expect that.
|
// as it doesn't expect that.
|
||||||
if (compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING)
|
if (compileOptions & (SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING |
|
||||||
|
SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP))
|
||||||
{
|
{
|
||||||
RewriteCubeMapSamplersAs2DArray(root, &getSymbolTable(),
|
RewriteCubeMapSamplersAs2DArray(
|
||||||
getShaderType() == GL_FRAGMENT_SHADER);
|
root, &getSymbolTable(), getShaderType() == GL_FRAGMENT_SHADER,
|
||||||
|
compileOptions & SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (defaultUniformCount > 0)
|
if (defaultUniformCount > 0)
|
||||||
|
|
|
@ -39,9 +39,56 @@ TIntermSymbol *GetValueFromNeighbor(TSymbolTable *symbolTable,
|
||||||
return neighbor;
|
return neighbor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate the difference of a value with another invocation in the quad. Used to emulate
|
||||||
|
// GetValueFromNeighbor where subgroup operations are not present.
|
||||||
|
//
|
||||||
|
// See comment in declareCoordTranslationFunction.
|
||||||
|
TIntermSymbol *GetDiffWithNeighbor(TSymbolTable *symbolTable,
|
||||||
|
TIntermBlock *body,
|
||||||
|
TFunction *dFdxyFine,
|
||||||
|
TIntermTyped *variable,
|
||||||
|
const TType *variableType)
|
||||||
|
{
|
||||||
|
TIntermTyped *neighborValue =
|
||||||
|
TIntermAggregate::CreateRawFunctionCall(*dFdxyFine, new TIntermSequence({variable}));
|
||||||
|
TIntermTyped *absNeighborValue = new TIntermUnary(EOpAbs, neighborValue, nullptr);
|
||||||
|
|
||||||
|
TIntermSymbol *neighbor = new TIntermSymbol(CreateTempVariable(symbolTable, variableType));
|
||||||
|
body->appendStatement(CreateTempInitDeclarationNode(&neighbor->variable(), absNeighborValue));
|
||||||
|
|
||||||
|
return neighbor;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to emulate GetValueFromNeighbor with bool values.
|
||||||
|
TIntermSymbol *IsNeighborNonHelper(TSymbolTable *symbolTable,
|
||||||
|
TIntermBlock *body,
|
||||||
|
TFunction *dFdxyFine,
|
||||||
|
TIntermTyped *gl_HelperInvocation)
|
||||||
|
{
|
||||||
|
const TType *boolType = StaticType::GetBasic<EbtBool>();
|
||||||
|
const TType *floatType = StaticType::GetBasic<EbtFloat>();
|
||||||
|
|
||||||
|
TIntermTyped *gl_HelperInvocationAsFloat =
|
||||||
|
TIntermAggregate::CreateConstructor(*floatType, new TIntermSequence({gl_HelperInvocation}));
|
||||||
|
TIntermSymbol *diffWithNeighbor =
|
||||||
|
GetDiffWithNeighbor(symbolTable, body, dFdxyFine, gl_HelperInvocationAsFloat, floatType);
|
||||||
|
|
||||||
|
TIntermTyped *isNeighborNonHelperValue =
|
||||||
|
new TIntermBinary(EOpGreaterThan, diffWithNeighbor, CreateFloatNode(0.5f));
|
||||||
|
TIntermSymbol *isNeighborNonHelper =
|
||||||
|
new TIntermSymbol(CreateTempVariable(symbolTable, boolType));
|
||||||
|
body->appendStatement(
|
||||||
|
CreateTempInitDeclarationNode(&isNeighborNonHelper->variable(), isNeighborNonHelperValue));
|
||||||
|
|
||||||
|
return isNeighborNonHelper;
|
||||||
|
}
|
||||||
|
|
||||||
// If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
|
// If this is a helper invocation, retrieve the layer index (cube map face) from another invocation
|
||||||
// in the quad that is not a helper. See comment in declareCoordTranslationFunction.
|
// in the quad that is not a helper. See comment in declareCoordTranslationFunction.
|
||||||
void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *body, TIntermTyped *l)
|
void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable,
|
||||||
|
TIntermBlock *body,
|
||||||
|
TIntermTyped *l,
|
||||||
|
bool useSubgroupOps)
|
||||||
{
|
{
|
||||||
TVariable *gl_HelperInvocationVar =
|
TVariable *gl_HelperInvocationVar =
|
||||||
new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
|
new TVariable(symbolTable, ImmutableString("gl_HelperInvocation"),
|
||||||
|
@ -50,48 +97,97 @@ void GetLayerFromNonHelperInvocation(TSymbolTable *symbolTable, TIntermBlock *bo
|
||||||
|
|
||||||
const TType *boolType = StaticType::GetBasic<EbtBool>();
|
const TType *boolType = StaticType::GetBasic<EbtBool>();
|
||||||
const TType *floatType = StaticType::GetBasic<EbtFloat>();
|
const TType *floatType = StaticType::GetBasic<EbtFloat>();
|
||||||
TFunction *quadSwapHorizontalBool =
|
|
||||||
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
|
|
||||||
SymbolType::AngleInternal, boolType, true);
|
|
||||||
TFunction *quadSwapHorizontalFloat =
|
|
||||||
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
|
|
||||||
SymbolType::AngleInternal, floatType, true);
|
|
||||||
TFunction *quadSwapVerticalBool =
|
|
||||||
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
|
|
||||||
SymbolType::AngleInternal, boolType, true);
|
|
||||||
TFunction *quadSwapVerticalFloat =
|
|
||||||
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
|
|
||||||
SymbolType::AngleInternal, floatType, true);
|
|
||||||
TFunction *quadSwapDiagonalFloat =
|
|
||||||
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
|
|
||||||
SymbolType::AngleInternal, floatType, true);
|
|
||||||
|
|
||||||
quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
TIntermSymbol *lH;
|
||||||
quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
TIntermSymbol *lV;
|
||||||
quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
TIntermSymbol *lD;
|
||||||
quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
|
||||||
quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
|
||||||
|
|
||||||
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
|
TIntermTyped *horizontalIsNonHelper;
|
||||||
// outside `if`s so the non-helper thread is not turned inactive.
|
TIntermTyped *verticalIsNonHelper;
|
||||||
TIntermSymbol *lH =
|
|
||||||
GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
|
|
||||||
TIntermSymbol *lV =
|
|
||||||
GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(), floatType);
|
|
||||||
TIntermSymbol *lD =
|
|
||||||
GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(), floatType);
|
|
||||||
|
|
||||||
// Get the value of gl_HelperInvocation from the neighbors too.
|
if (useSubgroupOps)
|
||||||
TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
|
{
|
||||||
symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
|
TFunction *quadSwapHorizontalBool =
|
||||||
TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
|
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
|
||||||
symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
|
SymbolType::AngleInternal, boolType, true);
|
||||||
|
TFunction *quadSwapHorizontalFloat =
|
||||||
|
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapHorizontal"),
|
||||||
|
SymbolType::AngleInternal, floatType, true);
|
||||||
|
TFunction *quadSwapVerticalBool =
|
||||||
|
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
|
||||||
|
SymbolType::AngleInternal, boolType, true);
|
||||||
|
TFunction *quadSwapVerticalFloat =
|
||||||
|
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapVertical"),
|
||||||
|
SymbolType::AngleInternal, floatType, true);
|
||||||
|
TFunction *quadSwapDiagonalFloat =
|
||||||
|
new TFunction(symbolTable, ImmutableString("subgroupQuadSwapDiagonal"),
|
||||||
|
SymbolType::AngleInternal, floatType, true);
|
||||||
|
|
||||||
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's not
|
quadSwapHorizontalBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
||||||
// enough to test if the neighbor is not a helper, we should also check if it's active.
|
quadSwapVerticalBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
||||||
TIntermTyped *horizontalIsNonHelper =
|
quadSwapHorizontalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
||||||
new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
|
quadSwapVerticalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
||||||
TIntermTyped *verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
|
quadSwapDiagonalFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
||||||
|
|
||||||
|
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
|
||||||
|
// outside `if`s so the non-helper thread is not turned inactive.
|
||||||
|
lH = GetValueFromNeighbor(symbolTable, body, quadSwapHorizontalFloat, l, floatType);
|
||||||
|
lV = GetValueFromNeighbor(symbolTable, body, quadSwapVerticalFloat, l->deepCopy(),
|
||||||
|
floatType);
|
||||||
|
lD = GetValueFromNeighbor(symbolTable, body, quadSwapDiagonalFloat, l->deepCopy(),
|
||||||
|
floatType);
|
||||||
|
|
||||||
|
// Get the value of gl_HelperInvocation from the neighbors too.
|
||||||
|
TIntermSymbol *horizontalIsHelper = GetValueFromNeighbor(
|
||||||
|
symbolTable, body, quadSwapHorizontalBool, gl_HelperInvocation->deepCopy(), boolType);
|
||||||
|
TIntermSymbol *verticalIsHelper = GetValueFromNeighbor(
|
||||||
|
symbolTable, body, quadSwapVerticalBool, gl_HelperInvocation->deepCopy(), boolType);
|
||||||
|
|
||||||
|
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
|
||||||
|
// not enough to test if the neighbor is not a helper, we should also check if it's active.
|
||||||
|
horizontalIsNonHelper = new TIntermUnary(EOpLogicalNot, horizontalIsHelper, nullptr);
|
||||||
|
verticalIsNonHelper = new TIntermUnary(EOpLogicalNot, verticalIsHelper, nullptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
TFunction *dFdxFineBool = new TFunction(symbolTable, ImmutableString("dFdxFine"),
|
||||||
|
SymbolType::AngleInternal, boolType, true);
|
||||||
|
TFunction *dFdxFineFloat = new TFunction(symbolTable, ImmutableString("dFdxFine"),
|
||||||
|
SymbolType::AngleInternal, floatType, true);
|
||||||
|
TFunction *dFdyFineBool = new TFunction(symbolTable, ImmutableString("dFdyFine"),
|
||||||
|
SymbolType::AngleInternal, boolType, true);
|
||||||
|
TFunction *dFdyFineFloat = new TFunction(symbolTable, ImmutableString("dFdyFine"),
|
||||||
|
SymbolType::AngleInternal, floatType, true);
|
||||||
|
|
||||||
|
dFdxFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
||||||
|
dFdyFineBool->addParameter(CreateTempVariable(symbolTable, boolType));
|
||||||
|
dFdxFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
||||||
|
dFdyFineFloat->addParameter(CreateTempVariable(symbolTable, floatType));
|
||||||
|
|
||||||
|
// layerQuadSwapHelper = gl_HelperInvocation ? 0.0 : layer;
|
||||||
|
TIntermTyped *layerQuadSwapHelperValue =
|
||||||
|
new TIntermTernary(gl_HelperInvocation->deepCopy(), CreateZeroNode(*floatType), l);
|
||||||
|
TIntermSymbol *layerQuadSwapHelper =
|
||||||
|
new TIntermSymbol(CreateTempVariable(symbolTable, floatType));
|
||||||
|
body->appendStatement(CreateTempInitDeclarationNode(&layerQuadSwapHelper->variable(),
|
||||||
|
layerQuadSwapHelperValue));
|
||||||
|
|
||||||
|
// Get the layer from the horizontal, vertical and diagonal neighbor. These should be done
|
||||||
|
// outside `if`s so the non-helper thread is not turned inactive.
|
||||||
|
lH = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, layerQuadSwapHelper, floatType);
|
||||||
|
lV = GetDiffWithNeighbor(symbolTable, body, dFdyFineFloat, layerQuadSwapHelper->deepCopy(),
|
||||||
|
floatType);
|
||||||
|
lD = GetDiffWithNeighbor(symbolTable, body, dFdxFineFloat, lV->deepCopy(), floatType);
|
||||||
|
|
||||||
|
// Get the value of gl_HelperInvocation from the neighbors too.
|
||||||
|
//
|
||||||
|
// Note(syoussefi): if the sampling is done inside an if with a non-uniform condition, it's
|
||||||
|
// not enough to test if the neighbor is not a helper, we should also check if it's active.
|
||||||
|
horizontalIsNonHelper =
|
||||||
|
IsNeighborNonHelper(symbolTable, body, dFdxFineBool, gl_HelperInvocation->deepCopy());
|
||||||
|
verticalIsNonHelper =
|
||||||
|
IsNeighborNonHelper(symbolTable, body, dFdyFineBool, gl_HelperInvocation->deepCopy());
|
||||||
|
}
|
||||||
|
|
||||||
TIntermTyped *lVD = new TIntermTernary(verticalIsNonHelper, lV, lD);
|
TIntermTyped *lVD = new TIntermTernary(verticalIsNonHelper, lV, lD);
|
||||||
TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
|
TIntermTyped *lHVD = new TIntermTernary(horizontalIsNonHelper, lH, lVD);
|
||||||
|
@ -163,10 +259,13 @@ void TransformZMajor(TIntermBlock *block,
|
||||||
class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
|
class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable, bool isFragmentShader)
|
RewriteCubeMapSamplersAs2DArrayTraverser(TSymbolTable *symbolTable,
|
||||||
|
bool isFragmentShader,
|
||||||
|
bool useSubgroupOps)
|
||||||
: TIntermTraverser(true, true, true, symbolTable),
|
: TIntermTraverser(true, true, true, symbolTable),
|
||||||
mCubeXYZToArrayUVL(nullptr),
|
mCubeXYZToArrayUVL(nullptr),
|
||||||
mIsFragmentShader(isFragmentShader),
|
mIsFragmentShader(isFragmentShader),
|
||||||
|
mUseSubgroupOps(useSubgroupOps),
|
||||||
mCoordTranslationFunctionDecl(nullptr)
|
mCoordTranslationFunctionDecl(nullptr)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
@ -543,14 +642,15 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
|
||||||
// incorrect and the wrong mip would be selected.
|
// incorrect and the wrong mip would be selected.
|
||||||
//
|
//
|
||||||
// We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
|
// We therefore use gl_HelperInvocation to identify these invocations and subgroupQuadSwap*
|
||||||
// operations to retrieve the layer from a non-helper invocation. As a result, the UVs
|
// (where available) or dFdx/dFdy (emulating subgroupQuadSwap*) to retrieve the layer from a
|
||||||
// calculated for the helper invocations correspond to the same face and end up outside the
|
// non-helper invocation. As a result, the UVs calculated for the helper invocations
|
||||||
// [0, 1] range, but result in correct derivatives. Indeed, sampling from any other kind of
|
// correspond to the same face and end up outside the [0, 1] range, but result in correct
|
||||||
// texture using varyings that range from [0, 1] would follow the same behavior (where
|
// derivatives. Indeed, sampling from any other kind of texture using varyings that range
|
||||||
// helper invocations generate UVs out of range).
|
// from [0, 1] would follow the same behavior (where helper invocations generate UVs out of
|
||||||
|
// range).
|
||||||
if (mIsFragmentShader)
|
if (mIsFragmentShader)
|
||||||
{
|
{
|
||||||
GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy());
|
GetLayerFromNonHelperInvocation(mSymbolTable, body, l->deepCopy(), mUseSubgroupOps);
|
||||||
}
|
}
|
||||||
|
|
||||||
// layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
|
// layer < 1.5 (covering faces 0 and 1, corresponding to major axis being X) and layer < 3.5
|
||||||
|
@ -819,6 +919,7 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
|
||||||
TFunction *mCubeXYZToArrayUVL;
|
TFunction *mCubeXYZToArrayUVL;
|
||||||
|
|
||||||
bool mIsFragmentShader;
|
bool mIsFragmentShader;
|
||||||
|
bool mUseSubgroupOps;
|
||||||
|
|
||||||
// Stored to be put before the first function after the pass.
|
// Stored to be put before the first function after the pass.
|
||||||
TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
|
TIntermFunctionDefinition *mCoordTranslationFunctionDecl;
|
||||||
|
@ -828,9 +929,11 @@ class RewriteCubeMapSamplersAs2DArrayTraverser : public TIntermTraverser
|
||||||
|
|
||||||
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
|
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
|
||||||
TSymbolTable *symbolTable,
|
TSymbolTable *symbolTable,
|
||||||
bool isFragmentShader)
|
bool isFragmentShader,
|
||||||
|
bool useSubgroupOps)
|
||||||
{
|
{
|
||||||
RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader);
|
RewriteCubeMapSamplersAs2DArrayTraverser traverser(symbolTable, isFragmentShader,
|
||||||
|
useSubgroupOps);
|
||||||
root->traverse(&traverser);
|
root->traverse(&traverser);
|
||||||
traverser.updateTree();
|
traverser.updateTree();
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,8 @@ class TSymbolTable;
|
||||||
|
|
||||||
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
|
void RewriteCubeMapSamplersAs2DArray(TIntermBlock *root,
|
||||||
TSymbolTable *symbolTable,
|
TSymbolTable *symbolTable,
|
||||||
bool isFragmentShader);
|
bool isFragmentShader,
|
||||||
|
bool useSubgroupOps);
|
||||||
} // namespace sh
|
} // namespace sh
|
||||||
|
|
||||||
#endif // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
|
#endif // COMPILER_TRANSLATOR_TREEOPS_REWRITECUBEMAPSAMPLERSAS2DARRAY_H_
|
||||||
|
|
|
@ -238,6 +238,8 @@ ContextVk::ContextVk(const gl::State &state, gl::ErrorSet *errorSet, RendererVk
|
||||||
mClearColorMask(kAllColorChannelsMask),
|
mClearColorMask(kAllColorChannelsMask),
|
||||||
mFlipYForCurrentSurface(false),
|
mFlipYForCurrentSurface(false),
|
||||||
mIsAnyHostVisibleBufferWritten(false),
|
mIsAnyHostVisibleBufferWritten(false),
|
||||||
|
mEmulateSeamfulCubeMapSampling(false),
|
||||||
|
mEmulateSeamfulCubeMapSamplingWithSubgroupOps(false),
|
||||||
mLastCompletedQueueSerial(renderer->nextSerial()),
|
mLastCompletedQueueSerial(renderer->nextSerial()),
|
||||||
mCurrentQueueSerial(renderer->nextSerial()),
|
mCurrentQueueSerial(renderer->nextSerial()),
|
||||||
mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
|
mPoolAllocator(kDefaultPoolAllocatorPageSize, 1),
|
||||||
|
@ -441,7 +443,8 @@ angle::Result ContextVk::initialize()
|
||||||
ANGLE_TRY(synchronizeCpuGpuTime());
|
ANGLE_TRY(synchronizeCpuGpuTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
mEmulateSeamfulCubeMapSampling = shouldEmulateSeamfulCubeMapSampling();
|
mEmulateSeamfulCubeMapSampling =
|
||||||
|
shouldEmulateSeamfulCubeMapSampling(&mEmulateSeamfulCubeMapSamplingWithSubgroupOps);
|
||||||
|
|
||||||
return angle::Result::Continue;
|
return angle::Result::Continue;
|
||||||
}
|
}
|
||||||
|
@ -2896,9 +2899,10 @@ vk::DescriptorSetLayoutDesc ContextVk::getDriverUniformsDescriptorSetDesc(
|
||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
|
bool ContextVk::shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
|
||||||
{
|
{
|
||||||
if (mState.getClientMajorVersion() != 2)
|
// Only allow seamful cube map sampling in non-webgl ES2.
|
||||||
|
if (mState.getClientMajorVersion() != 2 || mState.isWebGL())
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -2908,17 +2912,15 @@ bool ContextVk::shouldEmulateSeamfulCubeMapSampling() const
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use subgroup ops where available.
|
||||||
constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
|
constexpr VkSubgroupFeatureFlags kSeamfulCubeMapSubgroupOperations =
|
||||||
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||||
VK_SUBGROUP_FEATURE_QUAD_BIT;
|
VK_SUBGROUP_FEATURE_QUAD_BIT;
|
||||||
const VkSubgroupFeatureFlags deviceSupportedOperations =
|
const VkSubgroupFeatureFlags deviceSupportedOperations =
|
||||||
mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
|
mRenderer->getPhysicalDeviceSubgroupProperties().supportedOperations;
|
||||||
bool hasSeamfulCubeMapSubgroupOperations =
|
*useSubgroupOpsOut = (deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
|
||||||
(deviceSupportedOperations & kSeamfulCubeMapSubgroupOperations) ==
|
kSeamfulCubeMapSubgroupOperations;
|
||||||
kSeamfulCubeMapSubgroupOperations;
|
|
||||||
|
|
||||||
// Only enable seamful cube map emulation if the necessary subgroup operations are supported.
|
return true;
|
||||||
// Without them, we cannot remove derivative-related artifacts caused by helper invocations.
|
|
||||||
return hasSeamfulCubeMapSubgroupOperations;
|
|
||||||
}
|
}
|
||||||
} // namespace rx
|
} // namespace rx
|
||||||
|
|
|
@ -313,7 +313,11 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
|
||||||
|
|
||||||
void updateScissor(const gl::State &glState);
|
void updateScissor(const gl::State &glState);
|
||||||
|
|
||||||
bool emulateSeamfulCubeMapSampling() const { return mEmulateSeamfulCubeMapSampling; }
|
bool emulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const
|
||||||
|
{
|
||||||
|
*useSubgroupOpsOut = mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
|
||||||
|
return mEmulateSeamfulCubeMapSampling;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Dirty bits.
|
// Dirty bits.
|
||||||
|
@ -472,7 +476,7 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
|
||||||
|
|
||||||
void waitForSwapchainImageIfNecessary();
|
void waitForSwapchainImageIfNecessary();
|
||||||
|
|
||||||
bool shouldEmulateSeamfulCubeMapSampling() const;
|
bool shouldEmulateSeamfulCubeMapSampling(bool *useSubgroupOpsOut) const;
|
||||||
|
|
||||||
vk::PipelineHelper *mCurrentGraphicsPipeline;
|
vk::PipelineHelper *mCurrentGraphicsPipeline;
|
||||||
vk::PipelineAndSerial *mCurrentComputePipeline;
|
vk::PipelineAndSerial *mCurrentComputePipeline;
|
||||||
|
@ -535,8 +539,10 @@ class ContextVk : public ContextImpl, public vk::Context, public vk::RenderPassO
|
||||||
// at the end of the command buffer to make that write available to the host.
|
// at the end of the command buffer to make that write available to the host.
|
||||||
bool mIsAnyHostVisibleBufferWritten;
|
bool mIsAnyHostVisibleBufferWritten;
|
||||||
|
|
||||||
// Whether this context should do seamful cube map sampling emulation.
|
// Whether this context should do seamful cube map sampling emulation, and whether subgroup
|
||||||
|
// operations should be used.
|
||||||
bool mEmulateSeamfulCubeMapSampling;
|
bool mEmulateSeamfulCubeMapSampling;
|
||||||
|
bool mEmulateSeamfulCubeMapSamplingWithSubgroupOps;
|
||||||
|
|
||||||
struct DriverUniformsDescriptorSet
|
struct DriverUniformsDescriptorSet
|
||||||
{
|
{
|
||||||
|
|
|
@ -936,7 +936,7 @@ void GlslangWrapper::GetShaderSource(const gl::ProgramState &programState,
|
||||||
angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
|
angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
|
||||||
const gl::Caps &glCaps,
|
const gl::Caps &glCaps,
|
||||||
bool enableLineRasterEmulation,
|
bool enableLineRasterEmulation,
|
||||||
bool enableSeamfulCubeMapEmulation,
|
bool enableSubgroupOps,
|
||||||
const gl::ShaderMap<std::string> &shaderSources,
|
const gl::ShaderMap<std::string> &shaderSources,
|
||||||
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
|
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
|
||||||
{
|
{
|
||||||
|
@ -956,20 +956,18 @@ angle::Result GlslangWrapper::GetShaderCode(vk::Context *context,
|
||||||
kVersionDefine, kLineRasterDefine),
|
kVersionDefine, kLineRasterDefine),
|
||||||
VK_ERROR_INVALID_SHADER_NV);
|
VK_ERROR_INVALID_SHADER_NV);
|
||||||
|
|
||||||
return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, patchedSources,
|
return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, patchedSources, shaderCodeOut);
|
||||||
shaderCodeOut);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return GetShaderCodeImpl(context, glCaps, enableSeamfulCubeMapEmulation, shaderSources,
|
return GetShaderCodeImpl(context, glCaps, enableSubgroupOps, shaderSources, shaderCodeOut);
|
||||||
shaderCodeOut);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// static
|
// static
|
||||||
angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
|
angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
|
||||||
const gl::Caps &glCaps,
|
const gl::Caps &glCaps,
|
||||||
bool enableSeamfulCubeMapEmulation,
|
bool enableSubgroupOps,
|
||||||
const gl::ShaderMap<std::string> &shaderSources,
|
const gl::ShaderMap<std::string> &shaderSources,
|
||||||
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
|
gl::ShaderMap<std::vector<uint32_t>> *shaderCodeOut)
|
||||||
{
|
{
|
||||||
|
@ -1005,9 +1003,9 @@ angle::Result GlslangWrapper::GetShaderCodeImpl(vk::Context *context,
|
||||||
glslang::TShader *shader = shaders[shaderType];
|
glslang::TShader *shader = shaders[shaderType];
|
||||||
shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
|
shader->setStringsWithLengths(&shaderString, &shaderLength, 1);
|
||||||
shader->setEntryPoint("main");
|
shader->setEntryPoint("main");
|
||||||
if (enableSeamfulCubeMapEmulation)
|
if (enableSubgroupOps)
|
||||||
{
|
{
|
||||||
// Enable SPIR-V 1.3 if this workaround is used, as it uses subgroup operations.
|
// Enable SPIR-V 1.3 if to be able to use subgroup operations.
|
||||||
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
|
shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetSpv_1_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,14 +29,14 @@ class GlslangWrapper
|
||||||
static angle::Result GetShaderCode(vk::Context *context,
|
static angle::Result GetShaderCode(vk::Context *context,
|
||||||
const gl::Caps &glCaps,
|
const gl::Caps &glCaps,
|
||||||
bool enableLineRasterEmulation,
|
bool enableLineRasterEmulation,
|
||||||
bool enableSeamfulCubeMapEmulation,
|
bool enableSubgroupOps,
|
||||||
const gl::ShaderMap<std::string> &shaderSources,
|
const gl::ShaderMap<std::string> &shaderSources,
|
||||||
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
|
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static angle::Result GetShaderCodeImpl(vk::Context *context,
|
static angle::Result GetShaderCodeImpl(vk::Context *context,
|
||||||
const gl::Caps &glCaps,
|
const gl::Caps &glCaps,
|
||||||
bool enableSeamfulCubeMapEmulation,
|
bool enableSubgroupOps,
|
||||||
const gl::ShaderMap<std::string> &shaderSources,
|
const gl::ShaderMap<std::string> &shaderSources,
|
||||||
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
|
gl::ShaderMap<std::vector<uint32_t>> *shaderCodesOut);
|
||||||
};
|
};
|
||||||
|
|
|
@ -304,10 +304,16 @@ angle::Result ProgramVk::ShaderInfo::initShaders(ContextVk *contextVk,
|
||||||
{
|
{
|
||||||
ASSERT(!valid());
|
ASSERT(!valid());
|
||||||
|
|
||||||
|
bool useSubgroupOpsWithSeamfulCubeMapEmulation = false;
|
||||||
|
bool emulateSeamfulCubeMapSampling =
|
||||||
|
contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOpsWithSeamfulCubeMapEmulation);
|
||||||
|
bool useSubgroupOps =
|
||||||
|
emulateSeamfulCubeMapSampling && useSubgroupOpsWithSeamfulCubeMapEmulation;
|
||||||
|
|
||||||
gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
|
gl::ShaderMap<std::vector<uint32_t>> shaderCodes;
|
||||||
ANGLE_TRY(GlslangWrapper::GetShaderCode(
|
ANGLE_TRY(GlslangWrapper::GetShaderCode(contextVk, contextVk->getCaps(),
|
||||||
contextVk, contextVk->getCaps(), enableLineRasterEmulation,
|
enableLineRasterEmulation, useSubgroupOps,
|
||||||
contextVk->emulateSeamfulCubeMapSampling(), shaderSources, &shaderCodes));
|
shaderSources, &shaderCodes));
|
||||||
|
|
||||||
for (const gl::ShaderType shaderType : gl::AllShaderTypes())
|
for (const gl::ShaderType shaderType : gl::AllShaderTypes())
|
||||||
{
|
{
|
||||||
|
@ -1437,7 +1443,8 @@ angle::Result ProgramVk::updateTexturesDescriptorSet(ContextVk *contextVk)
|
||||||
|
|
||||||
const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
|
const gl::ActiveTextureArray<vk::TextureUnit> &activeTextures = contextVk->getActiveTextures();
|
||||||
|
|
||||||
bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling();
|
bool useSubgroupOps = false;
|
||||||
|
bool emulateSeamfulCubeMapSampling = contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps);
|
||||||
|
|
||||||
for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size();
|
for (uint32_t textureIndex = 0; textureIndex < mState.getSamplerBindings().size();
|
||||||
++textureIndex)
|
++textureIndex)
|
||||||
|
|
|
@ -1279,6 +1279,7 @@ void RendererVk::initFeatures(const ExtensionNameList &deviceExtensionNames)
|
||||||
|
|
||||||
if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID))
|
if (IsWindows() && IsAMD(mPhysicalDeviceProperties.vendorID))
|
||||||
{
|
{
|
||||||
|
// Disabled on AMD/windows due to buggy behavior.
|
||||||
mFeatures.disallowSeamfulCubeMapEmulation.enabled = true;
|
mFeatures.disallowSeamfulCubeMapEmulation.enabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,9 +40,17 @@ std::shared_ptr<WaitableCompileEvent> ShaderVk::compile(const gl::Context *conte
|
||||||
compileOptions |= SH_CLAMP_POINT_SIZE;
|
compileOptions |= SH_CLAMP_POINT_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (contextVk->emulateSeamfulCubeMapSampling())
|
bool useSubgroupOps = false;
|
||||||
|
if (contextVk->emulateSeamfulCubeMapSampling(&useSubgroupOps))
|
||||||
{
|
{
|
||||||
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
|
if (useSubgroupOps)
|
||||||
|
{
|
||||||
|
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING_WITH_SUBGROUP_OP;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
compileOptions |= SH_EMULATE_SEAMFUL_CUBE_MAP_SAMPLING;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options);
|
return compileImpl(context, compilerInstance, mData.getSource(), compileOptions | options);
|
||||||
|
|
|
@ -329,25 +329,6 @@
|
||||||
3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL
|
3306 VULKAN ANDROID : dEQP-GLES2.functional.polygon_offset.fixed16_factor_1_slope = FAIL
|
||||||
3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL
|
3307 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.nearest_linear = FAIL
|
||||||
|
|
||||||
// Seamful cubemap sampling failures on Android (due to missing support subgroupQuad* operations).
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_linear = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_nearest = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.projected.linear_linear = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_nearest = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.mipmap.cube.bias.linear_linear = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_clamp = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_nearest_mirror = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_clamp = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.filtering.linear_mipmap_linear_linear_mirror = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_clamp = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_repeat = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.clamp_mirror = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_clamp = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_repeat = FAIL
|
|
||||||
3243 VULKAN ANDROID : dEQP-GLES2.functional.texture.vertex.cube.wrap.mirror_mirror = FAIL
|
|
||||||
|
|
||||||
// These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors.
|
// These tests also fail on AMD windows driver as it is not allowed to use emulation due to errors.
|
||||||
3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
|
3243 VULKAN WIN AMD : dEQP-GLES2.functional.shaders.texture_functions.vertex.texturecubelod = FAIL
|
||||||
3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
|
3243 VULKAN WIN AMD : dEQP-GLES2.functional.texture.mipmap.cube.basic.linear_nearest = FAIL
|
||||||
|
|
Загрузка…
Ссылка в новой задаче