Implement PLS on Apple Silicon

Implements a subset of EXT_shader_framebuffer_fetch in the Metal
translator that is sufficient to support pixel local storage. Metal's
"programmable blending" feature is available on all Apple family GPUs
beginning with version 2.

Support for non-Apple GPUs will come later via readWrite textures,
which can also be coherent by annotating them with
[[raster_order_goup(0)]].

Bug: angleproject:7279
Change-Id: Ic74f6c0d21e87eb919e1f487163388d08d126857
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3916794
Reviewed-by: Kenneth Russell <kbr@chromium.org>
Commit-Queue: Chris Dalton <chris@rive.app>
Reviewed-by: Gregg Tavares <gman@chromium.org>
This commit is contained in:
Chris Dalton 2022-09-22 21:04:22 -06:00 коммит произвёл Angle LUCI CQ
Родитель 04f3ed80f4
Коммит 2d31fe9825
10 изменённых файлов: 135 добавлений и 23 удалений

Просмотреть файл

@ -26,7 +26,7 @@
// Version number for shader translation API. // Version number for shader translation API.
// It is incremented every time the API changes. // It is incremented every time the API changes.
#define ANGLE_SH_VERSION 307 #define ANGLE_SH_VERSION 308
enum ShShaderSpec enum ShShaderSpec
{ {
@ -99,10 +99,12 @@ enum class ShFragmentSynchronizationType
FragmentShaderInterlock_NV_GL, FragmentShaderInterlock_NV_GL,
FragmentShaderOrdering_INTEL_GL, FragmentShaderOrdering_INTEL_GL,
FragmentShaderInterlock_ARB_GL, FragmentShaderInterlock_ARB_GL, // Also compiles to SPV_EXT_fragment_shader_interlock.
RasterizerOrderViews_D3D, RasterizerOrderViews_D3D,
RasterOrderGroups_Metal,
InvalidEnum, InvalidEnum,
EnumCount = InvalidEnum, EnumCount = InvalidEnum,
}; };

Просмотреть файл

@ -438,6 +438,11 @@ TranslatorMetalDirect::TranslatorMetalDirect(sh::GLenum type,
TIntermBlock &root, TIntermBlock &root,
DriverUniformMetal &driverUniforms) DriverUniformMetal &driverUniforms)
{ {
if (!usesSampleMask())
{
return true;
}
// This transformation leaves the tree in an inconsistent state by using a variable that's // This transformation leaves the tree in an inconsistent state by using a variable that's
// defined in text, outside of the knowledge of the AST. // defined in text, outside of the knowledge of the AST.
mValidateASTOptions.validateVariableReferences = false; mValidateASTOptions.validateVariableReferences = false;
@ -856,9 +861,7 @@ bool TranslatorMetalDirect::translateImpl(TInfoSinkBase &sink,
AddFragDepthEXTDeclaration(*this, *root, symbolTable); AddFragDepthEXTDeclaration(*this, *root, symbolTable);
} }
// Always add sample_mask. It will be guarded by a function constant decided at runtime. if (usesSampleMask())
bool usesSampleMask = true;
if (usesSampleMask)
{ {
AddSampleMaskDeclaration(*root, symbolTable); AddSampleMaskDeclaration(*root, symbolTable);
} }

Просмотреть файл

@ -162,6 +162,10 @@ class TranslatorMetalDirect : public TCompiler
const ShCompileOptions &compileOptions, const ShCompileOptions &compileOptions,
PerformanceDiagnostics *perfDiagnostics) override; PerformanceDiagnostics *perfDiagnostics) override;
// The sample mask can't be in our fragment output struct if we read the framebuffer. Luckily,
// pixel local storage bans gl_SampleMask, so we can just not use it when PLS is active.
bool usesSampleMask() const { return !hasPixelLocalStorageUniforms(); }
// Need to collect variables so that RemoveInactiveInterfaceVariables works. // Need to collect variables so that RemoveInactiveInterfaceVariables works.
bool shouldCollectVariables(const ShCompileOptions &compileOptions) override { return true; } bool shouldCollectVariables(const ShCompileOptions &compileOptions) override { return true; }

Просмотреть файл

@ -197,6 +197,7 @@ class GenMetalTraverser : public TIntermTraverser
size_t mMainUniformBufferIndex = 0; size_t mMainUniformBufferIndex = 0;
size_t mDriverUniformsBindingIndex = 0; size_t mDriverUniformsBindingIndex = 0;
size_t mUBOArgumentBufferBindingIndex = 0; size_t mUBOArgumentBufferBindingIndex = 0;
bool mRasterOrderGroupsSupported = false;
}; };
} // anonymous namespace } // anonymous namespace
@ -221,7 +222,9 @@ GenMetalTraverser::GenMetalTraverser(const TCompiler &compiler,
mIdGen(idGen), mIdGen(idGen),
mMainUniformBufferIndex(compileOptions.metal.defaultUniformsBindingIndex), mMainUniformBufferIndex(compileOptions.metal.defaultUniformsBindingIndex),
mDriverUniformsBindingIndex(compileOptions.metal.driverUniformsBindingIndex), mDriverUniformsBindingIndex(compileOptions.metal.driverUniformsBindingIndex),
mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex) mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex),
mRasterOrderGroupsSupported(compileOptions.pls.fragmentSynchronizationType ==
ShFragmentSynchronizationType::RasterOrderGroups_Metal)
{} {}
void GenMetalTraverser::emitIndentation() void GenMetalTraverser::emitIndentation()
@ -1064,6 +1067,7 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
break; break;
case TQualifier::EvqFragmentOut: case TQualifier::EvqFragmentOut:
case TQualifier::EvqFragmentInOut:
case TQualifier::EvqFragData: case TQualifier::EvqFragData:
if (mPipelineStructs.fragmentOut.external == &parent) if (mPipelineStructs.fragmentOut.external == &parent)
{ {
@ -1080,7 +1084,17 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
const TLayoutQualifier &layoutQualifier = type.getLayoutQualifier(); const TLayoutQualifier &layoutQualifier = type.getLayoutQualifier();
size_t index = layoutQualifier.locationsSpecified ? layoutQualifier.location size_t index = layoutQualifier.locationsSpecified ? layoutQualifier.location
: annotationIndices.color++; : annotationIndices.color++;
mOut << " [[color(" << index << ")]]"; mOut << " [[color(" << index << ")";
if (mRasterOrderGroupsSupported && qual == TQualifier::EvqFragmentInOut)
{
// Put fragment inouts in their own raster order group for better
// parallelism.
// NOTE: this is not required for the reads to be ordered and coherent.
// TODO(anglebug.com/7279): Consider making raster order groups a PLS layout
// qualifier?
mOut << ", raster_order_group(0)";
}
mOut << "]]";
} }
} }
break; break;
@ -1309,6 +1323,8 @@ void GenMetalTraverser::emitVariableDeclaration(const VarDecl &decl,
{ {
if (type.isStructSpecifier() && !evdConfig.disableStructSpecifier) if (type.isStructSpecifier() && !evdConfig.disableStructSpecifier)
{ {
// It's invalid to declare a struct inside a function argument. When emitting a
// function parameter, the callsite should set evdConfig.disableStructSpecifier.
ASSERT(!evdConfig.isParameter); ASSERT(!evdConfig.isParameter);
emitStructDeclaration(type); emitStructDeclaration(type);
if (symbolType != SymbolType::Empty) if (symbolType != SymbolType::Empty)
@ -1813,12 +1829,13 @@ void GenMetalTraverser::emitFunctionParameter(const TFunction &func, const TVari
const TStructure *structure = type.getStruct(); const TStructure *structure = type.getStruct();
EmitVariableDeclarationConfig evdConfig; EmitVariableDeclarationConfig evdConfig;
evdConfig.isParameter = true; evdConfig.isParameter = true;
evdConfig.isMainParameter = isMain; evdConfig.disableStructSpecifier = true; // It's invalid to declare a struct in a function arg.
evdConfig.emitPostQualifier = isMain; evdConfig.isMainParameter = isMain;
evdConfig.isUBO = mSymbolEnv.isUBO(param); evdConfig.emitPostQualifier = isMain;
evdConfig.isPointer = mSymbolEnv.isPointer(param); evdConfig.isUBO = mSymbolEnv.isUBO(param);
evdConfig.isReference = mSymbolEnv.isReference(param); evdConfig.isPointer = mSymbolEnv.isPointer(param);
evdConfig.isReference = mSymbolEnv.isReference(param);
emitVariableDeclaration(VarDecl(param), evdConfig); emitVariableDeclaration(VarDecl(param), evdConfig);
if (isMain) if (isMain)

Просмотреть файл

@ -74,6 +74,7 @@ bool Pipeline::uses(const TVariable &var) const
switch (qualifier) switch (qualifier)
{ {
case TQualifier::EvqFragmentOut: case TQualifier::EvqFragmentOut:
case TQualifier::EvqFragmentInOut:
case TQualifier::EvqFragColor: case TQualifier::EvqFragColor:
case TQualifier::EvqFragData: case TQualifier::EvqFragData:
case TQualifier::EvqFragDepth: case TQualifier::EvqFragDepth:

Просмотреть файл

@ -348,6 +348,9 @@ class PipelineFunctionEnv
std::unordered_map<const TFunction *, const TFunction *> mFuncMap; std::unordered_map<const TFunction *, const TFunction *> mFuncMap;
// Optional expression with which to initialize mPipelineMainLocalVar.
TIntermTyped *mPipelineInitExpr = nullptr;
public: public:
PipelineFunctionEnv(TCompiler &compiler, PipelineFunctionEnv(TCompiler &compiler,
SymbolEnv &symbolEnv, SymbolEnv &symbolEnv,
@ -397,6 +400,20 @@ class PipelineFunctionEnv
ASSERT(func.getReturnType().getBasicType() == TBasicType::EbtVoid); ASSERT(func.getReturnType().getBasicType() == TBasicType::EbtVoid);
newFunc = &CloneFunctionAndChangeReturnType(mSymbolTable, nullptr, func, newFunc = &CloneFunctionAndChangeReturnType(mSymbolTable, nullptr, func,
*mPipelineStruct.external); *mPipelineStruct.external);
if (mPipeline.type == Pipeline::Type::FragmentOut &&
mCompiler.hasPixelLocalStorageUniforms())
{
// Add an input argument to main() that contains the current framebuffer
// attachment values, for loading pixel local storage.
TType *type = new TType(mPipelineStruct.external, true);
TVariable *lastFragmentOut =
new TVariable(&mSymbolTable, ImmutableString("lastFragmentOut"), type,
SymbolType::AngleInternal);
newFunc = &CloneFunctionAndPrependParam(mSymbolTable, nullptr, *newFunc,
*lastFragmentOut);
// Initialize the main local variable with the current framebuffer contents.
mPipelineInitExpr = new TIntermSymbol(lastFragmentOut);
}
} }
else if (isMain && (mPipeline.type == Pipeline::Type::InvocationVertexGlobals || else if (isMain && (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals)) mPipeline.type == Pipeline::Type::InvocationFragmentGlobals))
@ -546,6 +563,9 @@ class PipelineFunctionEnv
const TFunction &newFunc = getUpdatedFunction(func); const TFunction &newFunc = getUpdatedFunction(func);
return new TIntermFunctionPrototype(&newFunc); return new TIntermFunctionPrototype(&newFunc);
} }
// If not null, this is the value we need to initialize the pipeline main local variable with.
TIntermTyped *getOptionalPipelineInitExpr() { return mPipelineInitExpr; }
}; };
class UpdatePipelineFunctions : private TIntermRebuild class UpdatePipelineFunctions : private TIntermRebuild
@ -767,7 +787,8 @@ class UpdatePipelineFunctions : private TIntermRebuild
ASSERT(mPipelineMainLocalVar.isTotallyFull()); ASSERT(mPipelineMainLocalVar.isTotallyFull());
auto *newBody = new TIntermBlock(); auto *newBody = new TIntermBlock();
newBody->appendStatement(new TIntermDeclaration{mPipelineMainLocalVar.internal}); newBody->appendStatement(new TIntermDeclaration(mPipelineMainLocalVar.internal,
mEnv.getOptionalPipelineInitExpr()));
if (mPipeline.type == Pipeline::Type::InvocationVertexGlobals || if (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals) mPipeline.type == Pipeline::Type::InvocationFragmentGlobals)
@ -776,7 +797,7 @@ class UpdatePipelineFunctions : private TIntermRebuild
for (const TField *field : mPipelineStruct.external->fields()) for (const TField *field : mPipelineStruct.external->fields())
{ {
auto *var = new TVariable(&mSymbolTable, field->name(), field->type(), auto *var = new TVariable(&mSymbolTable, field->name(), field->type(),
field->symbolType()); field->symbolType());
auto *symbol = new TIntermSymbol(var); auto *symbol = new TIntermSymbol(var);
auto &accessNode = AccessField(*mPipelineMainLocalVar.internal, var->name()); auto &accessNode = AccessField(*mPipelineMainLocalVar.internal, var->name());
auto *assignNode = new TIntermBinary(TOperator::EOpAssign, &accessNode, symbol); auto *assignNode = new TIntermBinary(TOperator::EOpAssign, &accessNode, symbol);

Просмотреть файл

@ -121,6 +121,7 @@ class DisplayMtl : public DisplayImpl
const gl::Extensions &getNativeExtensions() const; const gl::Extensions &getNativeExtensions() const;
const gl::Limitations &getNativeLimitations() const; const gl::Limitations &getNativeLimitations() const;
ShPixelLocalStorageType getNativePixelLocalStorageType() const; ShPixelLocalStorageType getNativePixelLocalStorageType() const;
ShFragmentSynchronizationType getPLSSynchronizationType() const;
const angle::FeaturesMtl &getFeatures() const { return mFeatures; } const angle::FeaturesMtl &getFeatures() const { return mFeatures; }
// Check whether either of the specified iOS or Mac GPU family is supported // Check whether either of the specified iOS or Mac GPU family is supported
@ -204,6 +205,11 @@ class DisplayMtl : public DisplayImpl
mutable gl::Limitations mNativeLimitations; mutable gl::Limitations mNativeLimitations;
mutable uint32_t mMaxColorTargetBits = 0; mutable uint32_t mMaxColorTargetBits = 0;
// GL_ANGLE_shader_pixel_local_storage.
mutable ShPixelLocalStorageType mPixelLocalStorageType = ShPixelLocalStorageType::NotSupported;
mutable ShFragmentSynchronizationType mPLSSynchronizationType =
ShFragmentSynchronizationType::NotSupported;
angle::FeaturesMtl mFeatures; angle::FeaturesMtl mFeatures;
}; };

Просмотреть файл

@ -676,8 +676,13 @@ const gl::Limitations &DisplayMtl::getNativeLimitations() const
} }
ShPixelLocalStorageType DisplayMtl::getNativePixelLocalStorageType() const ShPixelLocalStorageType DisplayMtl::getNativePixelLocalStorageType() const
{ {
// PLS isn't supported on Metal yet. ensureCapsInitialized();
return ShPixelLocalStorageType::NotSupported; return mPixelLocalStorageType;
}
ShFragmentSynchronizationType DisplayMtl::getPLSSynchronizationType() const
{
ensureCapsInitialized();
return mPLSSynchronizationType;
} }
void DisplayMtl::ensureCapsInitialized() const void DisplayMtl::ensureCapsInitialized() const
@ -1021,6 +1026,44 @@ void DisplayMtl::initializeExtensions() const
// Metal uses the opposite provoking vertex as GLES so emulation is required to use the GLES // Metal uses the opposite provoking vertex as GLES so emulation is required to use the GLES
// behaviour. Allow users to change the provoking vertex for improved performance. // behaviour. Allow users to change the provoking vertex for improved performance.
mNativeExtensions.provokingVertexANGLE = true; mNativeExtensions.provokingVertexANGLE = true;
// GL_ANGLE_shader_pixel_local_storage.
if (supportsAppleGPUFamily(2))
{
// Programmable blending starts in Apple GPU family 2, and is always coherent.
mPixelLocalStorageType = ShPixelLocalStorageType::FramebufferFetch;
// Raster order groups are NOT required to make framebuffer fetch coherent, however, they
// may improve performance by allowing finer grained synchronization (e.g., by assigning
// attachments to different raster order groups if they don't depend on each other).
bool rasterOrderGroupsSupported = supportsAppleGPUFamily(4);
mPLSSynchronizationType = rasterOrderGroupsSupported
? ShFragmentSynchronizationType::RasterOrderGroups_Metal
: ShFragmentSynchronizationType::Automatic;
mNativeExtensions.shaderPixelLocalStorageANGLE = true;
mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = true;
}
else
{
// TODO(anglebug.com/7279): Implement PLS shader images.
// MTLReadWriteTextureTier readWriteTextureTier = [mMetalDevice readWriteTextureSupport];
// if (readWriteTextureTier != MTLReadWriteTextureTierNone)
// {
// mPixelLocalStorageType = (readWriteTextureTier == MTLReadWriteTextureTier1)
// ? ShPixelLocalStorageType::ImageStoreR32PackedFormats
// : ShPixelLocalStorageType::ImageStoreNativeFormats;
//
// // Raster order groups are required to make PLS coherent via readWrite textures.
// bool rasterOrderGroupsSupported = [mMetalDevice areRasterOrderGroupsSupported];
// mPLSSynchronizationType = rasterOrderGroupsSupported
// ? ShFragmentSynchronizationType::RasterOrderGroups_Metal
// : ShFragmentSynchronizationType::NotSupported;
//
// mNativeExtensions.shaderPixelLocalStorageANGLE = true;
// mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = rasterOrderGroupsSupported;
// }
}
} }
void DisplayMtl::initializeTextureCaps() const void DisplayMtl::initializeTextureCaps() const

Просмотреть файл

@ -103,7 +103,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
gl::ShCompilerInstance *compilerInstance, gl::ShCompilerInstance *compilerInstance,
ShCompileOptions *options) ShCompileOptions *options)
{ {
ContextMtl *contextMtl = mtl::GetImpl(context); ContextMtl *contextMtl = mtl::GetImpl(context);
DisplayMtl *displayMtl = contextMtl->getDisplay();
options->initializeUninitializedLocals = true; options->initializeUninitializedLocals = true;
if (context->isWebGL() && mState.getShaderType() != gl::ShaderType::Compute) if (context->isWebGL() && mState.getShaderType() != gl::ShaderType::Compute)
@ -111,7 +113,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->initOutputVariables = true; options->initOutputVariables = true;
} }
if (contextMtl->getDisplay()->getFeatures().intelExplicitBoolCastWorkaround.enabled) if (displayMtl->getFeatures().intelExplicitBoolCastWorkaround.enabled)
{ {
options->addExplicitBoolCasts = true; options->addExplicitBoolCasts = true;
} }
@ -121,7 +123,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->clampFragDepth = true; options->clampFragDepth = true;
#endif #endif
if (contextMtl->getDisplay()->getFeatures().rewriteRowMajorMatrices.enabled) if (displayMtl->getFeatures().rewriteRowMajorMatrices.enabled)
{ {
options->rewriteRowMajorMatrices = true; options->rewriteRowMajorMatrices = true;
} }
@ -137,6 +139,13 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->metal.defaultUniformsBindingIndex = mtl::kDefaultUniformsBindingIndex; options->metal.defaultUniformsBindingIndex = mtl::kDefaultUniformsBindingIndex;
options->metal.UBOArgumentBufferBindingIndex = mtl::kUBOArgumentBufferBindingIndex; options->metal.UBOArgumentBufferBindingIndex = mtl::kUBOArgumentBufferBindingIndex;
// GL_ANGLE_shader_pixel_local_storage.
if (displayMtl->getNativeExtensions().shaderPixelLocalStorageANGLE)
{
options->pls.type = displayMtl->getNativePixelLocalStorageType();
options->pls.fragmentSynchronizationType = displayMtl->getPLSSynchronizationType();
}
return compileImplMtl(context, compilerInstance, getState().getSource(), options); return compileImplMtl(context, compilerInstance, getState().getSource(), options);
} }

Просмотреть файл

@ -1971,7 +1971,7 @@ TEST_P(PixelLocalStorageTest, LeakFramebufferAndTexture)
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest); GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
#define PLATFORM(API, BACKEND) API##_##BACKEND() #define PLATFORM(API, BACKEND) API##_##BACKEND()
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) \ #define PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API, ...) \
ANGLE_INSTANTIATE_TEST( \ ANGLE_INSTANTIATE_TEST( \
TEST, \ TEST, \
PLATFORM(API, D3D11) /* D3D coherent. */ \ PLATFORM(API, D3D11) /* D3D coherent. */ \
@ -2022,8 +2022,14 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
PLATFORM(API, VULKAN_SWIFTSHADER) /* Test PLS not having access to \ PLATFORM(API, VULKAN_SWIFTSHADER) /* Test PLS not having access to \
glEnablei/glDisablei/glColorMaski. */ \ glEnablei/glDisablei/glColorMaski. */ \
.enable(Feature::EmulatePixelLocalStorage) \ .enable(Feature::EmulatePixelLocalStorage) \
.enable(Feature::DisableDrawBuffersIndexed)) .enable(Feature::DisableDrawBuffersIndexed), \
PLS_INSTANTIATE_RENDERING_TEST(PixelLocalStorageTest, ES3); __VA_ARGS__)
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API)
PLS_INSTANTIATE_RENDERING_TEST_AND(PixelLocalStorageTest,
ES3,
ES3_METAL().enable(Feature::EmulatePixelLocalStorage));
class PixelLocalStorageTestES31 : public PixelLocalStorageTest class PixelLocalStorageTestES31 : public PixelLocalStorageTest
{}; {};