Implement PLS on Apple Silicon

Implements a subset of EXT_shader_framebuffer_fetch in the Metal
translator that is sufficient to support pixel local storage. Metal's
"programmable blending" feature is available on all Apple family GPUs
beginning with version 2.

Support for non-Apple GPUs will come later via readWrite textures,
which can also be coherent by annotating them with
[[raster_order_goup(0)]].

Bug: angleproject:7279
Change-Id: Ic74f6c0d21e87eb919e1f487163388d08d126857
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3916794
Reviewed-by: Kenneth Russell <kbr@chromium.org>
Commit-Queue: Chris Dalton <chris@rive.app>
Reviewed-by: Gregg Tavares <gman@chromium.org>
This commit is contained in:
Chris Dalton 2022-09-22 21:04:22 -06:00 коммит произвёл Angle LUCI CQ
Родитель 04f3ed80f4
Коммит 2d31fe9825
10 изменённых файлов: 135 добавлений и 23 удалений

Просмотреть файл

@ -26,7 +26,7 @@
// Version number for shader translation API.
// It is incremented every time the API changes.
#define ANGLE_SH_VERSION 307
#define ANGLE_SH_VERSION 308
enum ShShaderSpec
{
@ -99,10 +99,12 @@ enum class ShFragmentSynchronizationType
FragmentShaderInterlock_NV_GL,
FragmentShaderOrdering_INTEL_GL,
FragmentShaderInterlock_ARB_GL,
FragmentShaderInterlock_ARB_GL, // Also compiles to SPV_EXT_fragment_shader_interlock.
RasterizerOrderViews_D3D,
RasterOrderGroups_Metal,
InvalidEnum,
EnumCount = InvalidEnum,
};

Просмотреть файл

@ -438,6 +438,11 @@ TranslatorMetalDirect::TranslatorMetalDirect(sh::GLenum type,
TIntermBlock &root,
DriverUniformMetal &driverUniforms)
{
if (!usesSampleMask())
{
return true;
}
// This transformation leaves the tree in an inconsistent state by using a variable that's
// defined in text, outside of the knowledge of the AST.
mValidateASTOptions.validateVariableReferences = false;
@ -856,9 +861,7 @@ bool TranslatorMetalDirect::translateImpl(TInfoSinkBase &sink,
AddFragDepthEXTDeclaration(*this, *root, symbolTable);
}
// Always add sample_mask. It will be guarded by a function constant decided at runtime.
bool usesSampleMask = true;
if (usesSampleMask)
if (usesSampleMask())
{
AddSampleMaskDeclaration(*root, symbolTable);
}

Просмотреть файл

@ -162,6 +162,10 @@ class TranslatorMetalDirect : public TCompiler
const ShCompileOptions &compileOptions,
PerformanceDiagnostics *perfDiagnostics) override;
// The sample mask can't be in our fragment output struct if we read the framebuffer. Luckily,
// pixel local storage bans gl_SampleMask, so we can just not use it when PLS is active.
bool usesSampleMask() const { return !hasPixelLocalStorageUniforms(); }
// Need to collect variables so that RemoveInactiveInterfaceVariables works.
bool shouldCollectVariables(const ShCompileOptions &compileOptions) override { return true; }

Просмотреть файл

@ -197,6 +197,7 @@ class GenMetalTraverser : public TIntermTraverser
size_t mMainUniformBufferIndex = 0;
size_t mDriverUniformsBindingIndex = 0;
size_t mUBOArgumentBufferBindingIndex = 0;
bool mRasterOrderGroupsSupported = false;
};
} // anonymous namespace
@ -221,7 +222,9 @@ GenMetalTraverser::GenMetalTraverser(const TCompiler &compiler,
mIdGen(idGen),
mMainUniformBufferIndex(compileOptions.metal.defaultUniformsBindingIndex),
mDriverUniformsBindingIndex(compileOptions.metal.driverUniformsBindingIndex),
mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex)
mUBOArgumentBufferBindingIndex(compileOptions.metal.UBOArgumentBufferBindingIndex),
mRasterOrderGroupsSupported(compileOptions.pls.fragmentSynchronizationType ==
ShFragmentSynchronizationType::RasterOrderGroups_Metal)
{}
void GenMetalTraverser::emitIndentation()
@ -1064,6 +1067,7 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
break;
case TQualifier::EvqFragmentOut:
case TQualifier::EvqFragmentInOut:
case TQualifier::EvqFragData:
if (mPipelineStructs.fragmentOut.external == &parent)
{
@ -1080,7 +1084,17 @@ void GenMetalTraverser::emitFieldDeclaration(const TField &field,
const TLayoutQualifier &layoutQualifier = type.getLayoutQualifier();
size_t index = layoutQualifier.locationsSpecified ? layoutQualifier.location
: annotationIndices.color++;
mOut << " [[color(" << index << ")]]";
mOut << " [[color(" << index << ")";
if (mRasterOrderGroupsSupported && qual == TQualifier::EvqFragmentInOut)
{
// Put fragment inouts in their own raster order group for better
// parallelism.
// NOTE: this is not required for the reads to be ordered and coherent.
// TODO(anglebug.com/7279): Consider making raster order groups a PLS layout
// qualifier?
mOut << ", raster_order_group(0)";
}
mOut << "]]";
}
}
break;
@ -1309,6 +1323,8 @@ void GenMetalTraverser::emitVariableDeclaration(const VarDecl &decl,
{
if (type.isStructSpecifier() && !evdConfig.disableStructSpecifier)
{
// It's invalid to declare a struct inside a function argument. When emitting a
// function parameter, the callsite should set evdConfig.disableStructSpecifier.
ASSERT(!evdConfig.isParameter);
emitStructDeclaration(type);
if (symbolType != SymbolType::Empty)
@ -1813,12 +1829,13 @@ void GenMetalTraverser::emitFunctionParameter(const TFunction &func, const TVari
const TStructure *structure = type.getStruct();
EmitVariableDeclarationConfig evdConfig;
evdConfig.isParameter = true;
evdConfig.isMainParameter = isMain;
evdConfig.emitPostQualifier = isMain;
evdConfig.isUBO = mSymbolEnv.isUBO(param);
evdConfig.isPointer = mSymbolEnv.isPointer(param);
evdConfig.isReference = mSymbolEnv.isReference(param);
evdConfig.isParameter = true;
evdConfig.disableStructSpecifier = true; // It's invalid to declare a struct in a function arg.
evdConfig.isMainParameter = isMain;
evdConfig.emitPostQualifier = isMain;
evdConfig.isUBO = mSymbolEnv.isUBO(param);
evdConfig.isPointer = mSymbolEnv.isPointer(param);
evdConfig.isReference = mSymbolEnv.isReference(param);
emitVariableDeclaration(VarDecl(param), evdConfig);
if (isMain)

Просмотреть файл

@ -74,6 +74,7 @@ bool Pipeline::uses(const TVariable &var) const
switch (qualifier)
{
case TQualifier::EvqFragmentOut:
case TQualifier::EvqFragmentInOut:
case TQualifier::EvqFragColor:
case TQualifier::EvqFragData:
case TQualifier::EvqFragDepth:

Просмотреть файл

@ -348,6 +348,9 @@ class PipelineFunctionEnv
std::unordered_map<const TFunction *, const TFunction *> mFuncMap;
// Optional expression with which to initialize mPipelineMainLocalVar.
TIntermTyped *mPipelineInitExpr = nullptr;
public:
PipelineFunctionEnv(TCompiler &compiler,
SymbolEnv &symbolEnv,
@ -397,6 +400,20 @@ class PipelineFunctionEnv
ASSERT(func.getReturnType().getBasicType() == TBasicType::EbtVoid);
newFunc = &CloneFunctionAndChangeReturnType(mSymbolTable, nullptr, func,
*mPipelineStruct.external);
if (mPipeline.type == Pipeline::Type::FragmentOut &&
mCompiler.hasPixelLocalStorageUniforms())
{
// Add an input argument to main() that contains the current framebuffer
// attachment values, for loading pixel local storage.
TType *type = new TType(mPipelineStruct.external, true);
TVariable *lastFragmentOut =
new TVariable(&mSymbolTable, ImmutableString("lastFragmentOut"), type,
SymbolType::AngleInternal);
newFunc = &CloneFunctionAndPrependParam(mSymbolTable, nullptr, *newFunc,
*lastFragmentOut);
// Initialize the main local variable with the current framebuffer contents.
mPipelineInitExpr = new TIntermSymbol(lastFragmentOut);
}
}
else if (isMain && (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals))
@ -546,6 +563,9 @@ class PipelineFunctionEnv
const TFunction &newFunc = getUpdatedFunction(func);
return new TIntermFunctionPrototype(&newFunc);
}
// If not null, this is the value we need to initialize the pipeline main local variable with.
TIntermTyped *getOptionalPipelineInitExpr() { return mPipelineInitExpr; }
};
class UpdatePipelineFunctions : private TIntermRebuild
@ -767,7 +787,8 @@ class UpdatePipelineFunctions : private TIntermRebuild
ASSERT(mPipelineMainLocalVar.isTotallyFull());
auto *newBody = new TIntermBlock();
newBody->appendStatement(new TIntermDeclaration{mPipelineMainLocalVar.internal});
newBody->appendStatement(new TIntermDeclaration(mPipelineMainLocalVar.internal,
mEnv.getOptionalPipelineInitExpr()));
if (mPipeline.type == Pipeline::Type::InvocationVertexGlobals ||
mPipeline.type == Pipeline::Type::InvocationFragmentGlobals)
@ -776,7 +797,7 @@ class UpdatePipelineFunctions : private TIntermRebuild
for (const TField *field : mPipelineStruct.external->fields())
{
auto *var = new TVariable(&mSymbolTable, field->name(), field->type(),
field->symbolType());
field->symbolType());
auto *symbol = new TIntermSymbol(var);
auto &accessNode = AccessField(*mPipelineMainLocalVar.internal, var->name());
auto *assignNode = new TIntermBinary(TOperator::EOpAssign, &accessNode, symbol);

Просмотреть файл

@ -121,6 +121,7 @@ class DisplayMtl : public DisplayImpl
const gl::Extensions &getNativeExtensions() const;
const gl::Limitations &getNativeLimitations() const;
ShPixelLocalStorageType getNativePixelLocalStorageType() const;
ShFragmentSynchronizationType getPLSSynchronizationType() const;
const angle::FeaturesMtl &getFeatures() const { return mFeatures; }
// Check whether either of the specified iOS or Mac GPU family is supported
@ -204,6 +205,11 @@ class DisplayMtl : public DisplayImpl
mutable gl::Limitations mNativeLimitations;
mutable uint32_t mMaxColorTargetBits = 0;
// GL_ANGLE_shader_pixel_local_storage.
mutable ShPixelLocalStorageType mPixelLocalStorageType = ShPixelLocalStorageType::NotSupported;
mutable ShFragmentSynchronizationType mPLSSynchronizationType =
ShFragmentSynchronizationType::NotSupported;
angle::FeaturesMtl mFeatures;
};

Просмотреть файл

@ -676,8 +676,13 @@ const gl::Limitations &DisplayMtl::getNativeLimitations() const
}
ShPixelLocalStorageType DisplayMtl::getNativePixelLocalStorageType() const
{
// PLS isn't supported on Metal yet.
return ShPixelLocalStorageType::NotSupported;
ensureCapsInitialized();
return mPixelLocalStorageType;
}
ShFragmentSynchronizationType DisplayMtl::getPLSSynchronizationType() const
{
ensureCapsInitialized();
return mPLSSynchronizationType;
}
void DisplayMtl::ensureCapsInitialized() const
@ -1021,6 +1026,44 @@ void DisplayMtl::initializeExtensions() const
// Metal uses the opposite provoking vertex as GLES so emulation is required to use the GLES
// behaviour. Allow users to change the provoking vertex for improved performance.
mNativeExtensions.provokingVertexANGLE = true;
// GL_ANGLE_shader_pixel_local_storage.
if (supportsAppleGPUFamily(2))
{
// Programmable blending starts in Apple GPU family 2, and is always coherent.
mPixelLocalStorageType = ShPixelLocalStorageType::FramebufferFetch;
// Raster order groups are NOT required to make framebuffer fetch coherent, however, they
// may improve performance by allowing finer grained synchronization (e.g., by assigning
// attachments to different raster order groups if they don't depend on each other).
bool rasterOrderGroupsSupported = supportsAppleGPUFamily(4);
mPLSSynchronizationType = rasterOrderGroupsSupported
? ShFragmentSynchronizationType::RasterOrderGroups_Metal
: ShFragmentSynchronizationType::Automatic;
mNativeExtensions.shaderPixelLocalStorageANGLE = true;
mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = true;
}
else
{
// TODO(anglebug.com/7279): Implement PLS shader images.
// MTLReadWriteTextureTier readWriteTextureTier = [mMetalDevice readWriteTextureSupport];
// if (readWriteTextureTier != MTLReadWriteTextureTierNone)
// {
// mPixelLocalStorageType = (readWriteTextureTier == MTLReadWriteTextureTier1)
// ? ShPixelLocalStorageType::ImageStoreR32PackedFormats
// : ShPixelLocalStorageType::ImageStoreNativeFormats;
//
// // Raster order groups are required to make PLS coherent via readWrite textures.
// bool rasterOrderGroupsSupported = [mMetalDevice areRasterOrderGroupsSupported];
// mPLSSynchronizationType = rasterOrderGroupsSupported
// ? ShFragmentSynchronizationType::RasterOrderGroups_Metal
// : ShFragmentSynchronizationType::NotSupported;
//
// mNativeExtensions.shaderPixelLocalStorageANGLE = true;
// mNativeExtensions.shaderPixelLocalStorageCoherentANGLE = rasterOrderGroupsSupported;
// }
}
}
void DisplayMtl::initializeTextureCaps() const

Просмотреть файл

@ -103,7 +103,9 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
gl::ShCompilerInstance *compilerInstance,
ShCompileOptions *options)
{
ContextMtl *contextMtl = mtl::GetImpl(context);
ContextMtl *contextMtl = mtl::GetImpl(context);
DisplayMtl *displayMtl = contextMtl->getDisplay();
options->initializeUninitializedLocals = true;
if (context->isWebGL() && mState.getShaderType() != gl::ShaderType::Compute)
@ -111,7 +113,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->initOutputVariables = true;
}
if (contextMtl->getDisplay()->getFeatures().intelExplicitBoolCastWorkaround.enabled)
if (displayMtl->getFeatures().intelExplicitBoolCastWorkaround.enabled)
{
options->addExplicitBoolCasts = true;
}
@ -121,7 +123,7 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->clampFragDepth = true;
#endif
if (contextMtl->getDisplay()->getFeatures().rewriteRowMajorMatrices.enabled)
if (displayMtl->getFeatures().rewriteRowMajorMatrices.enabled)
{
options->rewriteRowMajorMatrices = true;
}
@ -137,6 +139,13 @@ std::shared_ptr<WaitableCompileEvent> ShaderMtl::compile(const gl::Context *cont
options->metal.defaultUniformsBindingIndex = mtl::kDefaultUniformsBindingIndex;
options->metal.UBOArgumentBufferBindingIndex = mtl::kUBOArgumentBufferBindingIndex;
// GL_ANGLE_shader_pixel_local_storage.
if (displayMtl->getNativeExtensions().shaderPixelLocalStorageANGLE)
{
options->pls.type = displayMtl->getNativePixelLocalStorageType();
options->pls.fragmentSynchronizationType = displayMtl->getPLSSynchronizationType();
}
return compileImplMtl(context, compilerInstance, getState().getSource(), options);
}

Просмотреть файл

@ -1971,7 +1971,7 @@ TEST_P(PixelLocalStorageTest, LeakFramebufferAndTexture)
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
#define PLATFORM(API, BACKEND) API##_##BACKEND()
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) \
#define PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API, ...) \
ANGLE_INSTANTIATE_TEST( \
TEST, \
PLATFORM(API, D3D11) /* D3D coherent. */ \
@ -2022,8 +2022,14 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(PixelLocalStorageTest);
PLATFORM(API, VULKAN_SWIFTSHADER) /* Test PLS not having access to \
glEnablei/glDisablei/glColorMaski. */ \
.enable(Feature::EmulatePixelLocalStorage) \
.enable(Feature::DisableDrawBuffersIndexed))
PLS_INSTANTIATE_RENDERING_TEST(PixelLocalStorageTest, ES3);
.enable(Feature::DisableDrawBuffersIndexed), \
__VA_ARGS__)
#define PLS_INSTANTIATE_RENDERING_TEST(TEST, API) PLS_INSTANTIATE_RENDERING_TEST_AND(TEST, API)
PLS_INSTANTIATE_RENDERING_TEST_AND(PixelLocalStorageTest,
ES3,
ES3_METAL().enable(Feature::EmulatePixelLocalStorage));
class PixelLocalStorageTestES31 : public PixelLocalStorageTest
{};