зеркало из https://github.com/AvaloniaUI/angle.git
Implement PLS on Apple Silicon
Implements a subset of EXT_shader_framebuffer_fetch in the Metal translator that is sufficient to support pixel local storage. Metal's "programmable blending" feature is available on all Apple family GPUs beginning with version 2. Support for non-Apple GPUs will come later via readWrite textures, which can also be coherent by annotating them with [[raster_order_goup(0)]]. Bug: angleproject:7279 Change-Id: Ic74f6c0d21e87eb919e1f487163388d08d126857 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3916794 Reviewed-by: Kenneth Russell <kbr@chromium.org> Commit-Queue: Chris Dalton <chris@rive.app> Reviewed-by: Gregg Tavares <gman@chromium.org>
This commit is contained in:
Родитель
04f3ed80f4
Коммит
2d31fe9825
|
@ -26,7 +26,7 @@
|
|||
|
||||
// Version number for shader translation API.
|
||||
// It is incremented every time the API changes.
|
||||
#define ANGLE_SH_VERSION 307
|
||||
#define ANGLE_SH_VERSION 308
|
||||
|
||||
enum ShShaderSpec
|
||||
{
|
||||
|
@ -99,10 +99,12 @@ enum class ShFragmentSynchronizationType
|
|||
|
||||
FragmentShaderInterlock_NV_GL,
|
||||
FragmentShaderOrdering_INTEL_GL,
|
||||
FragmentShaderInterlock_ARB_GL,
|
||||
FragmentShaderInterlock_ARB_GL, // Also compiles to SPV_EXT_fragment_shader_interlock.
|
||||
|
||||
RasterizerOrderViews_D3D,
|
||||
|
||||
RasterOrderGroups_Metal,
|
||||
|
||||
InvalidEnum,
|
||||
EnumCount = InvalidEnum,
|
||||
};
|
||||
|
|
|
@ -438,6 +438,11 @@ TranslatorMetalDirect::TranslatorMetalDirect(sh::GLenum type,
|
|||
TIntermBlock &root,
|
||||
DriverUniformMetal &driverUniforms)
|
||||
{
|
||||
if (!usesSampleMask())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// This transformation leaves the tree in an inconsistent state by using a variable that's
|
||||
// defined in text, outside of the knowledge of the AST.
|
||||
mValidateASTOptions.validateVariableReferences = false;
|
||||
|
@ -856,9 +861,7 @@ bool TranslatorMetalDirect::translateImpl(TInfoSinkBase &sink,
|
|||
AddFragDepthEXTDeclaration(*this, *root, symbolTable);
|
||||
}
|
||||
|
||||
// Always add sample_mask. It will be guarded by a function constant decided at runtime.
|
||||
bool usesSampleMask = true;
|
||||
if (usesSampleMask)
|
||||
if (usesSampleMask())
|
||||
{
|
||||
AddSampleMaskDeclaration(*root, symbolTable);
|
||||
}
|
||||
|
|
|
@ -162,6 +162,10 @@ class TranslatorMetalDirect : public TCompiler
|
|||
const ShCompileOptions &compileOptions,
|
||||
PerformanceDiagnostics *perfDiagnostics) override;
|
||||
|
||||
// The sample mask can't be in our fragment output struct if we read the framebuffer. Luckily,
|
||||
// pixel local storage bans gl_SampleMask, so we can just not use it when PLS is active.
|
||||
bool usesSampleMask() const { return !hasPixelLocalStorageUniforms(); }
|
||||
|
||||
// Need to collect variables so that RemoveInactiveInterfaceVariables works.
|
||||
bool shouldCollectVariables(const ShCompileOptions &compileOptions) override { return true; }
|
||||
|
||||
|
|
|
@ -197,6 +197,7 @@ class GenMetalTraverser : public TIntermTraverser
|
|||
size_t mMainUniformBufferIndex = 0;
|
||||
size_t mDriverUniformsBindingIndex = 0;
|
||||
size_t mUBOArgumentBufferBindingIndex = 0;
|
||||
bool mRasterOrderGroupsSupported = false;
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
|
@ -221,7 +222,9 @@ GenMetalTraverser::GenMetalTraverser(const TCompiler &compiler,
|
|||
mIdGen(idGen),
|
||||
mMainUniformBufferIndex(compileOptions.metal.defaultUniformsBindingIndex),
|
||||
mDriverUniformsBindingIndex(compileOptions.metal.driverUniformsBindingIndex),
|
||||
mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex)
|
||||
mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex),
|
||||
mRasterOrderGroupsSupported(compileOptions.pls.fragmentSynchronizationType ==
|
||||
ShFragmentSynchronizationType::RasterOrderGroups_Metal)
|
||||
{}
|
||||
|
||||
void GenMetalTraverser::emitIndentation()
|
||||
|
@ -1064,6 +1067,7 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
|
|||
break;
|
||||
|
||||
case TQualifier::EvqFragmentOut:
|
||||
case TQualifier::EvqFragmentInOut:
|
||||
case TQualifier::EvqFragData:
|
||||
if (mPipelineStructs.fragmentOut.external == &parent)
|
||||
{
|
||||
|
@ -1080,7 +1084,17 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
|
|||
const TLayoutQualifier &layoutQualifier = type.getLayoutQualifier();
|
||||
size_t index = layoutQualifier.locationsSpecified ? layoutQualifier.location
|
||||
: annotationIndices.color++;
|
||||
mOut << " [[color(" << index << ")]]";
|
||||
mOut << " [[color(" << index << ")";
|
||||
if (mRasterOrderGroupsSupported && qual == TQualifier::EvqFragmentInOut)
|
||||
{
|
||||
// Put fragment inouts in their own raster order group for better
|
||||
// parallelism.
|
||||
// NOTE: this is not required for the reads to be ordered and coherent.
|
||||
// TODO(anglebug.com/7279): Consider making raster order groups a PLS layout
|
||||
// qualifier?
|
||||
mOut << ", raster_order_group(0)";
|
||||
}
|
||||
mOut << "]]";
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1309,6 +1323,8 @@ void GenMetalTraverser::emitVariableDeclaration(const VarDecl &decl,
|
|||
{
|
||||
if (type.isStructSpecifier() && !evdConfig.disableStructSpecifier)
|
||||
{
|
||||
// It's invalid to declare a struct inside a function argument. When emitting a
|
||||
// function parameter, the callsite should set evdConfig.disableStructSpecifier.
|
||||
ASSERT(!evdConfig.isParameter);
|
||||
emitStructDeclaration(type);
|
||||
if (symbolType != SymbolType::Empty)
|
||||
|
@ -1813,12 +1829,13 @@ void GenMetalTraverser::emitFunctionParameter(const TFunction &func, const TVari
|
|||
const TStructure *structure = type.getStruct();
|
||||
|
||||
EmitVariableDeclarationConfig evdConfig;
|
||||
evdConfig.isParameter = true;
|
||||
evdConfig.isMainParameter = isMain;
|
||||
evdConfig.emitPostQualifier = isMain;
|
||||
evdConfig.isUBO = mSymbolEnv.isUBO(param);
|
||||
evdConfig.isPointer = mSymbolEnv.isPointer(param);
|
||||
evdConfig.isReference = mSymbolEnv.isReference(param);
|
||||
evdConfig.isParameter = true;
|
||||
evdConfig.disableStructSpecifier = true; // It's invalid to declare a struct in a function arg.
|
||||
evdConfig.isMainParameter = isMain;
|
||||
evdConfig.emitPostQualifier = isMain;
|
||||
evdConfig.isUBO = mSymbolEnv.isUBO(param);
|
||||
evdConfig.isPointer = mSymbolEnv.isPointer(param);
|
||||
evdConfig.isReference = mSymbolEnv.isReference(param);
|
||||
emitVariableDeclaration(VarDecl(param), evdConfig);
|
||||
|
||||
if (isMain)
|
||||
|
|
|
@ -74,6 +74,7 @@ bool Pipeline::uses(const TVariable &var) const
|
|||
switch (qualifier)
|
||||
{
|
||||
case TQualifier::EvqFragmentOut:
|
||||
case TQualifier::EvqFragmentInOut:
|
||||
case TQualifier::EvqFragColor:
|
||||
case TQualifier::EvqFragData:
|
||||
case TQualifier::EvqFragDepth:
|
||||
|
|
|
@ -348,6 +348,9 @@ class PipelineFunctionEnv
|
|||
|
||||
std::unordered_map<const TFunction *, const TFunction *> mFuncMap;
|
||||
|
||||
// Optional expression with which to initialize mPipelineMainLocalVar.
|
||||
TIntermTyped *mPipelineInitExpr = nullptr;
|
||||
|
||||
public:
|
||||
PipelineFunctionEnv(TCompiler &compiler,
|
||||
SymbolEnv &symbolEnv,
|
||||
|
@ -397,6 +400,20 @@ class PipelineFunctionEnv
|
|||
ASSERT(func.getReturnType().getBasicType() == TBasicType::EbtVoid);
|
||||
newFunc = &CloneFunctionAndChangeReturnType(mSymbolTable, nullptr, func,
|
||||
*mPipelineStruct.external);
|
||||
if (mPipeline.type == Pipeline::Type::FragmentOut &&
|
||||
mCompiler.hasPixelLocalStorageUniforms())
|
||||
{
|
||||
// Add an input argument to main() that contains the current framebuffer
|
||||
// attachment values, for loading pixel local storage.
|
||||
TType *type = new TType(mPipelineStruct.external, true);
|
||||
TVariable *lastFragmentOut =
|
||||
new TVariable(&mSymbolTable, ImmutableString("lastFragmentOut"), type,
|
||||
SymbolType::AngleInternal);
|
||||
newFunc = &CloneFunctionAndPrependParam(mSymbolTable, nullptr, *newFunc,
|
||||
*lastFragmentOut);
|
||||
// Initialize the main local variable with the current framebuffer contents.
|
||||
mPipelineInitExpr = new TIntermSymbol(lastFragmentOut);
|
||||
}
|
||||
}
|
||||
else if (isMain && (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
|
||||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals))
|
||||
|
@ -546,6 +563,9 @@ class PipelineFunctionEnv
|
|||
const TFunction &newFunc = getUpdatedFunction(func);
|
||||
return new TIntermFunctionPrototype(&newFunc);
|
||||
}
|
||||
|
||||
// If not null, this is the value we need to initialize the pipeline main local variable with.
|
||||
TIntermTyped *getOptionalPipelineInitExpr() { return mPipelineInitExpr; }
|
||||
};
|
||||
|
||||
class UpdatePipelineFunctions : private TIntermRebuild
|
||||
|
@ -767,7 +787,8 @@ class UpdatePipelineFunctions : private TIntermRebuild
|
|||
ASSERT(mPipelineMainLocalVar.isTotallyFull());
|
||||
|
||||
auto *newBody = new TIntermBlock();
|
||||
newBody->appendStatement(new TIntermDeclaration{mPipelineMainLocalVar.internal});
|
||||
newBody->appendStatement(new TIntermDeclaration(mPipelineMainLocalVar.internal,
|
||||
mEnv.getOptionalPipelineInitExpr()));
|
||||
|
||||
if (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
|
||||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals)
|
||||
|
@ -776,7 +797,7 @@ class UpdatePipelineFunctions : private TIntermRebuild
|
|||
for (const TField *field : mPipelineStruct.external->fields())
|
||||
{
|
||||
auto *var = new TVariable(&mSymbolTable, field->name(), field->type(),
|
||||
field->symbolType());
|
||||
field->symbolType());
|
||||
auto *symbol = new TIntermSymbol(var);
|
||||
auto &accessNode = AccessField(*mPipelineMainLocalVar.internal, var->name());
|
||||
auto *assignNode = new TIntermBinary(TOperator::EOpAssign, &accessNode, symbol);
|
||||
|
|
|
@ -121,6 +121,7 @@ class DisplayMtl : public DisplayImpl
|
|||
const gl::Extensions &getNativeExtensions() const;
|
||||
const gl::Limitations &getNativeLimitations() const;
|
||||
ShPixelLocalStorageType getNativePixelLocalStorageType() const;
|
||||
ShFragmentSynchronizationType getPLSSynchronizationType() const;
|
||||
const angle::FeaturesMtl &getFeatures() const { return mFeatures; }
|
||||
|
||||
// Check whether either of the specified iOS or Mac GPU family is supported
|
||||
|
@ -204,6 +205,11 @@ class DisplayMtl : public DisplayImpl
|
|||
mutable gl::Limitations mNativeLimitations;
|
||||
mutable uint32_t mMaxColorTargetBits = 0;
|
||||
|
||||
// GL_ANGLE_shader_pixel_local_storage.
|
||||
mutable ShPixelLocalStorageType mPixelLocalStorageType = ShPixelLocalStorageType::NotSupported;
|
||||
mutable ShFragmentSynchronizationType mPLSSynchronizationType =
|
||||
ShFragmentSynchronizationType::NotSupported;
|
||||
|
||||
angle::FeaturesMtl mFeatures;
|
||||
};
|
||||
|
||||
|
|
|
@ -676,8 +676,13 @@ const gl::Limitations &DisplayMtl::getNativeLimitations() const
|
|||
}
|
||||
ShPixelLocalStorageType DisplayMtl::getNativePixelLocalStorageType() const
|
||||
{
|
||||
// PLS isn't supported on Metal yet.
|
||||
return ShPixelLocalStorageType::NotSupported;
|
||||
ensureCapsInitialized();
|
||||
return mPixelLocalStorageType;
|
||||
}
|
||||
ShFragmentSynchronizationType DisplayMtl::getPLSSynchronizationType() const
|
||||
{
|
||||
ensureCapsInitialized();
|
||||
return mPLSSynchronizationType;
|
||||
}
|
||||
|
||||
void DisplayMtl::ensureCapsInitialized() const
|
||||
|
@ -1021,6 +1026,44 @@ void DisplayMtl::initializeExtensions() const
|
|||
// Metal uses the opposite provoking vertex as GLES so emulation is required to use the GLES
|
||||
// behaviour. Allow users to change the provoking vertex for improved performance.
|
||||
mNativeExtensions.provokingVertexANGLE = true;
|
||||
|
||||
// GL_ANGLE_shader_pixel_local_storage.
|
||||
if (supportsAppleGPUFamily(2))
|
||||
{
|
||||
// Programmable blending starts in Apple GPU family 2, and is always coherent.
|
||||
mPixelLocalStorageType = ShPixelLocalStorageType::FramebufferFetch;
|
||||
|
||||
// Raster order groups are NOT required to make framebuffer fetch coherent, however, they
|
||||
// may improve performance by allowing finer grained synchronization (e.g., by assigning
|
||||
// attachments to different raster order groups if they don't depend on each other).
|
||||
bool rasterOrderGroupsSupported = supportsAppleGPUFamily(4);
|
||||
mPLSSynchronizationType = rasterOrderGroupsSupported
|
||||
? ShFragmentSynchronizationType::RasterOrderGroups_Metal
|
||||
: ShFragmentSynchronizationType::Automatic;
|
||||
|
||||
mNativeExtensions.shaderPixelLocalStorageANGLE = true;
|
||||
mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO(anglebug.com/7279): Implement PLS shader images.
|
||||
// MTLReadWriteTextureTier readWriteTextureTier = [mMetalDevice readWriteTextureSupport];
|
||||
// if (readWriteTextureTier != MTLReadWriteTextureTierNone)
|
||||
// {
|
||||
// mPixelLocalStorageType = (readWriteTextureTier == MTLReadWriteTextureTier1)
|
||||
// ? ShPixelLocalStorageType::ImageStoreR32PackedFormats
|
||||
// : ShPixelLocalStorageType::ImageStoreNativeFormats;
|
||||
//
|
||||
// // Raster order groups are required to make PLS coherent via readWrite textures.
|
||||
// bool rasterOrderGroupsSupported = [mMetalDevice areRasterOrderGroupsSupported];
|
||||
// mPLSSynchronizationType = rasterOrderGroupsSupported
|
||||
// ? ShFragmentSynchronizationType::RasterOrderGroups_Metal
|
||||
// : ShFragmentSynchronizationType::NotSupported;
|
||||
//
|
||||
// mNativeExtensions.shaderPixelLocalStorageANGLE = true;
|
||||
// mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = rasterOrderGroupsSupported;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
void DisplayMtl::initializeTextureCaps() const
|
||||
|
|
|
@ -103,7 +103,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
|
|||
gl::ShCompilerInstance *compilerInstance,
|
||||
ShCompileOptions *options)
|
||||
{
|
||||
ContextMtl *contextMtl = mtl::GetImpl(context);
|
||||
ContextMtl *contextMtl = mtl::GetImpl(context);
|
||||
DisplayMtl *displayMtl = contextMtl->getDisplay();
|
||||
|
||||
options->initializeUninitializedLocals = true;
|
||||
|
||||
if (context->isWebGL() && mState.getShaderType() != gl::ShaderType::Compute)
|
||||
|
@ -111,7 +113,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
|
|||
options->initOutputVariables = true;
|
||||
}
|
||||
|
||||
if (contextMtl->getDisplay()->getFeatures().intelExplicitBoolCastWorkaround.enabled)
|
||||
if (displayMtl->getFeatures().intelExplicitBoolCastWorkaround.enabled)
|
||||
{
|
||||
options->addExplicitBoolCasts = true;
|
||||
}
|
||||
|
@ -121,7 +123,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
|
|||
options->clampFragDepth = true;
|
||||
#endif
|
||||
|
||||
if (contextMtl->getDisplay()->getFeatures().rewriteRowMajorMatrices.enabled)
|
||||
if (displayMtl->getFeatures().rewriteRowMajorMatrices.enabled)
|
||||
{
|
||||
options->rewriteRowMajorMatrices = true;
|
||||
}
|
||||
|
@ -137,6 +139,13 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
|
|||
options->metal.defaultUniformsBindingIndex = mtl::kDefaultUniformsBindingIndex;
|
||||
options->metal.UBOArgumentBufferBindingIndex = mtl::kUBOArgumentBufferBindingIndex;
|
||||
|
||||
// GL_ANGLE_shader_pixel_local_storage.
|
||||
if (displayMtl->getNativeExtensions().shaderPixelLocalStorageANGLE)
|
||||
{
|
||||
options->pls.type = displayMtl->getNativePixelLocalStorageType();
|
||||
options->pls.fragmentSynchronizationType = displayMtl->getPLSSynchronizationType();
|
||||
}
|
||||
|
||||
return compileImplMtl(context, compilerInstance, getState().getSource(), options);
|
||||
}
|
||||
|
||||
|
|
|
@ -1971,7 +1971,7 @@ TEST_P(PixelLocalStorageTest, LeakFramebufferAndTexture)
|
|||
|
||||
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
|
||||
#define PLATFORM(API, BACKEND) API##_##BACKEND()
|
||||
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) \
|
||||
#define PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API, ...) \
|
||||
ANGLE_INSTANTIATE_TEST( \
|
||||
TEST, \
|
||||
PLATFORM(API, D3D11) /* D3D coherent. */ \
|
||||
|
@ -2022,8 +2022,14 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
|
|||
PLATFORM(API, VULKAN_SWIFTSHADER) /* Test PLS not having access to \
|
||||
glEnablei/glDisablei/glColorMaski. */ \
|
||||
.enable(Feature::EmulatePixelLocalStorage) \
|
||||
.enable(Feature::DisableDrawBuffersIndexed))
|
||||
PLS_INSTANTIATE_RENDERING_TEST(PixelLocalStorageTest, ES3);
|
||||
.enable(Feature::DisableDrawBuffersIndexed), \
|
||||
__VA_ARGS__)
|
||||
|
||||
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API)
|
||||
|
||||
PLS_INSTANTIATE_RENDERING_TEST_AND(PixelLocalStorageTest,
|
||||
ES3,
|
||||
ES3_METAL().enable(Feature::EmulatePixelLocalStorage));
|
||||
|
||||
class PixelLocalStorageTestES31 : public PixelLocalStorageTest
|
||||
{};
|
||||
|
|
Загрузка…
Ссылка в новой задаче