Use the AST analyses to narrow the usage of [[loop]] and [[unroll]]

These attributes are now used exactly in the loops and ifs
that require them, limiting the number of failed compilations
due to excessive unrolling and flattening.
Also output Lod0 functions only when needed.

Adds unit tests for LOOP, FLATTEN and Lod0 generation.

The patch was tested against the WebGL CTS 1.0.4 for which all the
failures existed prior to this patch and seem to be unrelated to this
change. It also works correctly on the following sites that had trouble
with [[loop]] and [[unroll]]:
 * dev.miaumiau.cat/rayTracer "Skull Demo"
 * The turbulenz engine particle demo
 * Lots of ShaderToy samples (including "Volcanic" and "Metropolis")
 * Google Maps Earth mode
 * Lots of Chrome Experiments
 * Lagoa
 * madebyevan.com/webgl-water
 * SketchFab
 * Unit Tests

BUG=angleproject:937
BUG=395048

Change-Id: I856de9025f10b79781929ec212dbffc2064a940e
Reviewed-on: https://chromium-review.googlesource.com/264791
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Tested-by: Corentin Wallez <cwallez@chromium.org>
This commit is contained in:
Corentin Wallez 2015-03-19 14:38:02 -07:00
Родитель 794e0009ef
Коммит 1239ee94fb
4 изменённых файлов: 272 добавлений и 38 удалений

Просмотреть файл

@ -14,7 +14,6 @@
#include "common/utilities.h"
#include "compiler/translator/BuiltInFunctionEmulator.h"
#include "compiler/translator/BuiltInFunctionEmulatorHLSL.h"
#include "compiler/translator/DetectDiscontinuity.h"
#include "compiler/translator/FlagStd140Structs.h"
#include "compiler/translator/InfoSink.h"
#include "compiler/translator/NodeSearch.h"
@ -109,7 +108,8 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion,
mSourcePath(sourcePath),
mOutputType(outputType),
mNumRenderTargets(numRenderTargets),
mCompileOptions(compileOptions)
mCompileOptions(compileOptions),
mCurrentFunctionMetadata(nullptr)
{
mUnfoldShortCircuit = new UnfoldShortCircuit(this);
mInsideFunction = false;
@ -130,8 +130,6 @@ OutputHLSL::OutputHLSL(sh::GLenum shaderType, int shaderVersion,
mUniqueIndex = 0;
mContainsLoopDiscontinuity = false;
mContainsAnyLoop = false;
mOutputLod0Function = false;
mInsideDiscontinuousLoop = false;
mNestedLoopDepth = 0;
@ -170,9 +168,6 @@ OutputHLSL::~OutputHLSL()
void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
{
mContainsLoopDiscontinuity = mShaderType == GL_FRAGMENT_SHADER && containsLoopDiscontinuity(treeRoot);
mContainsAnyLoop = containsAnyLoop(treeRoot);
const std::vector<TIntermTyped*> &flaggedStructs = FlagStd140ValueStructs(treeRoot);
makeFlaggedStructMaps(flaggedStructs);
@ -188,12 +183,9 @@ void OutputHLSL::output(TIntermNode *treeRoot, TInfoSinkBase &objSink)
builtInFunctionEmulator.MarkBuiltInFunctionsForEmulation(treeRoot);
// Now that we are done changing the AST, do the analyses need for HLSL generation
{
CallDAG dag;
CallDAG::InitResult success = dag.init(treeRoot, &objSink);
ASSERT(success == CallDAG::INITDAG_SUCCESS);
mASTAnalyses = CreateASTMetadataHLSL(treeRoot, dag);
}
CallDAG::InitResult success = mCallDag.init(treeRoot, &objSink);
ASSERT(success == CallDAG::INITDAG_SUCCESS);
mASTMetadataList = CreateASTMetadataHLSL(treeRoot, mCallDag);
// Output the body and footer first to determine what has to go in the header
mInfoSinkStack.push(&mBody);
@ -1963,6 +1955,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpPrototype:
if (visit == PreVisit)
{
size_t index = mCallDag.findIndex(node);
// Skip the prototype if it is not implemented (and thus not used)
if (index == CallDAG::InvalidIndex)
{
return false;
}
out << TypeString(node->getType()) << " " << Decorate(TFunction::unmangleName(node->getName())) << (mOutputLod0Function ? "Lod0(" : "(");
TIntermSequence *arguments = node->getSequence();
@ -1986,7 +1985,8 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
out << ");\n";
// Also prototype the Lod0 variant if needed
if (mContainsLoopDiscontinuity && !mOutputLod0Function)
bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
{
mOutputLod0Function = true;
node->traverse(this);
@ -1999,8 +1999,13 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpComma: outputTriplet(visit, "(", ", ", ")"); break;
case EOpFunction:
{
ASSERT(mCurrentFunctionMetadata == nullptr);
TString name = TFunction::unmangleName(node->getName());
size_t index = mCallDag.findIndex(node);
ASSERT(index != CallDAG::InvalidIndex);
mCurrentFunctionMetadata = &mASTMetadataList[index];
out << TypeString(node->getType()) << " ";
if (name == "main")
@ -2050,14 +2055,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
out << "}\n";
if (mContainsLoopDiscontinuity && !mOutputLod0Function)
mCurrentFunctionMetadata = nullptr;
bool needsLod0 = mASTMetadataList[index].mNeedsLod0;
if (needsLod0 && !mOutputLod0Function && mShaderType == GL_FRAGMENT_SHADER)
{
if (name != "main")
{
mOutputLod0Function = true;
node->traverse(this);
mOutputLod0Function = false;
}
ASSERT(name != "main");
mOutputLod0Function = true;
node->traverse(this);
mOutputLod0Function = false;
}
return false;
@ -2066,11 +2072,15 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpFunctionCall:
{
TString name = TFunction::unmangleName(node->getName());
bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
TIntermSequence *arguments = node->getSequence();
bool lod0 = mInsideDiscontinuousLoop || mOutputLod0Function;
if (node->isUserDefined())
{
size_t index = mCallDag.findIndex(node);
ASSERT(index != CallDAG::InvalidIndex);
lod0 &= mASTMetadataList[index].mNeedsLod0;
out << Decorate(name) << (lod0 ? "Lod0(" : "(");
}
else
@ -2301,11 +2311,10 @@ bool OutputHLSL::visitSelection(Visit visit, TIntermSelection *node)
{
mUnfoldShortCircuit->traverse(node->getCondition());
// D3D errors when there is a gradient operation in a loop in an unflattened if
// however flattening all the ifs in branch heavy shaders made D3D error too.
// As a temporary workaround we flatten the ifs only if there is at least a loop
// present somewhere in the shader.
if (mShaderType == GL_FRAGMENT_SHADER && mContainsAnyLoop)
// D3D errors when there is a gradient operation in a loop in an unflattened if.
if (mShaderType == GL_FRAGMENT_SHADER
&& mCurrentFunctionMetadata->hasDiscontinuousLoop(node)
&& mCurrentFunctionMetadata->hasGradientInCallGraph(node))
{
out << "FLATTEN ";
}
@ -2400,11 +2409,8 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node)
mNestedLoopDepth++;
bool wasDiscontinuous = mInsideDiscontinuousLoop;
if (mContainsLoopDiscontinuity && !mInsideDiscontinuousLoop)
{
mInsideDiscontinuousLoop = containsLoopDiscontinuity(node);
}
mInsideDiscontinuousLoop = mInsideDiscontinuousLoop ||
mCurrentFunctionMetadata->mDiscontinuousLoops.count(node) >= 0;
if (mOutputType == SH_HLSL9_OUTPUT)
{
@ -2419,16 +2425,17 @@ bool OutputHLSL::visitLoop(Visit visit, TIntermLoop *node)
TInfoSinkBase &out = getInfoSink();
const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
if (node->getType() == ELoopDoWhile)
{
out << "{LOOP do\n";
out << "{" << unroll << " do\n";
outputLineDirective(node->getLine().first_line);
out << "{\n";
}
else
{
out << "{LOOP for(";
out << "{" << unroll << " for(";
if (node->getInit())
{
@ -2734,8 +2741,9 @@ bool OutputHLSL::handleExcessiveLoop(TIntermLoop *node)
}
// for(int index = initial; index < clampedLimit; index += increment)
const char *unroll = mCurrentFunctionMetadata->hasGradientInCallGraph(node) ? "LOOP" : "";
out << "LOOP for(";
out << unroll << " for(";
index->traverse(this);
out << " = ";
out << initial;

Просмотреть файл

@ -170,9 +170,9 @@ class OutputHLSL : public TIntermTraverser
int mUniqueIndex; // For creating unique names
std::vector<ASTMetadataHLSL> mASTAnalyses;
bool mContainsLoopDiscontinuity;
bool mContainsAnyLoop;
CallDAG mCallDag;
MetadataList mASTMetadataList;
ASTMetadataHLSL *mCurrentFunctionMetadata;
bool mOutputLod0Function;
bool mInsideDiscontinuousLoop;
int mNestedLoopDepth;

Просмотреть файл

@ -98,5 +98,13 @@
},
},
}],
['OS=="win"',
{
# TODO(cwallez): make this angle_enable_hlsl instead (requires gyp file refactoring)
'sources':
[
'<(angle_path)/src/tests/compiler_tests/UnrollFlatten_test.cpp',
],
}],
],
}

Просмотреть файл

@ -0,0 +1,218 @@
//
// Copyright (c) 2015 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// UnrollFlatten_test.cpp:
// Test for the outputting of [[unroll]] and [[flatten]] for the D3D compiler
//
#include "angle_gl.h"
#include "gtest/gtest.h"
#include "GLSLANG/ShaderLang.h"
#include "compiler/translator/TranslatorHLSL.h"
namespace
{
class UnrollFlattenTest : public testing::Test
{
public:
UnrollFlattenTest() {}
protected:
void SetUp() override
{
ShBuiltInResources resources;
ShInitBuiltInResources(&resources);
resources.FragmentPrecisionHigh = 1;
mTranslator = new TranslatorHLSL(GL_FRAGMENT_SHADER, SH_GLES2_SPEC, SH_HLSL11_OUTPUT);
ASSERT_TRUE(mTranslator->Init(resources));
}
void TearDown() override
{
SafeDelete(mTranslator);
}
void compile(const std::string &shaderString)
{
const char *shaderStrings[] = { shaderString.c_str() };
bool compilationSuccess = mTranslator->compile(shaderStrings, 1, SH_VARIABLES | SH_OBJECT_CODE);
TInfoSink &infoSink = mTranslator->getInfoSink();
if (!compilationSuccess)
{
FAIL() << "Shader compilation failed " << infoSink.info.str();
}
mTranslatedSource = infoSink.obj.str();
// Ignore the beginning of the shader to avoid the definitions of LOOP and FLATTEN
mCurrentPosition = mTranslatedSource.find("GL_USES_FRAG_COLOR");
}
void expect(const char *patterns[], size_t count)
{
const char *badPatterns[] = { UNROLL, FLATTEN };
for (size_t i = 0; i < count; i++)
{
const char *pattern = patterns[i];
auto position = mTranslatedSource.find(pattern, mCurrentPosition);
if (position == std::string::npos)
{
FAIL() << "Couldn't find '" << pattern << "' after expectations '"
<< mExpectationList << "' in translated source:\n" << mTranslatedSource;
}
for (size_t j = 0; j < ArraySize(badPatterns); j++)
{
const char *badPattern = badPatterns[j];
if (pattern != badPattern &&
mTranslatedSource.find(badPattern, mCurrentPosition) < position)
{
FAIL() << "Found '" << badPattern << "' before '" << pattern << "' after expectations '"
<< mExpectationList << "' in translated source:\n" << mTranslatedSource;
}
}
mExpectationList += " - " + std::string(pattern);
mCurrentPosition = position + 1;
}
}
static const char *UNROLL;
static const char *FLATTEN;
private:
TranslatorHLSL *mTranslator;
std::string mTranslatedSource;
int mCurrentPosition;
std::string mExpectationList;
};
const char *UnrollFlattenTest::UNROLL = "LOOP";
const char *UnrollFlattenTest::FLATTEN = "FLATTEN";
// Check that the nothing is added if there is no gradient operation
// even when there is ifs and discontinuous loops
TEST_F(UnrollFlattenTest, NoGradient)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"float fun(float a){\n" // 1
" if (a > 1.0) {return f;}\n" // 2
" else {return a + 1.0;}\n"
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {break;}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
compile(shaderString);
// 1 - shouldn't get a Lod0 version generated
// 2 - no FLATTEN because does not contain discont loop
// 3 - shouldn't get a Lod0 version generated
// 4 - no LOOP because discont, and also no gradient
// 5 - no FLATTEN because does not contain discont loop
// 6 - call non-Lod0 version
// 7 - no FLATTEN
const char *expectations[] =
{
"fun(", "if",
"fun2(", "for", "if", "break", "fun(",
"main(", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
// Check that when we have a gradient in a non-discontinuous loop
// we use the regular version of the functions. Also checks that
// LOOP is generated for the loop containing the gradient.
TEST_F(UnrollFlattenTest, GradientNotInDiscont)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"uniform sampler2D tex;"
"float fun(float a){\n" // 1
" return texture2D(tex, vec2(0.5, f)).x;\n" // 2
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
// 1 - shouldn't get a Lod0 version generated
// 2 - no Lod0 version generated
// 3 - shouldn't get a Lod0 version generated (not in discont loop)
// 4 - should have LOOP because it contains a gradient operation (even if Lod0)
// 5 - no FLATTEN because doesn't contain discont loop
// 6 - call Lod0 version
// 7 - no FLATTEN
compile(shaderString);
const char *expectations[] =
{
"fun(", "texture2D(",
"fun2(", "LOOP", "for", "if", "fun(",
"main(", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
// Check that when we have a gradient in a discontinuous loop
// we use the Lod0 version of the functions.
TEST_F(UnrollFlattenTest, GradientInDiscont)
{
const std::string &shaderString =
"precision mediump float;\n"
"uniform float f;\n"
"uniform sampler2D tex;"
"float fun(float a){\n" // 1
" return texture2D(tex, vec2(0.5, f)).x;\n" // 2
"}\n"
"float fun2(float a){\n" // 3
" for (int i = 0; i < 10; i++) {\n" // 4
" if (a > 1.0) {break;}\n" // 5
" a = fun(a);\n" // 6
" }\n"
" return a;\n"
"}\n"
"void main() {\n"
" float accum = 0.0;\n"
" if (f < 5.0) {accum = fun2(accum);}\n" // 7
" gl_FragColor = vec4(accum);\n"
"}\n";
// 1 - should get a Lod0 version generated (gradient + discont loop)
// 2 - will get the Lod0 if in funLod0
// 3 - shouldn't get a Lod0 version generated (not in discont loop)
// 4 - should have LOOP because it contains a gradient operation (even if Lod0)
// 5 - no FLATTEN because doesn't contain discont loop
// 6 - call Lod0 version
// 7 - should have a FLATTEN because has a discont loop and gradient
compile(shaderString);
const char *expectations[] =
{
"fun(", "texture2D(",
"funLod0(", "texture2DLod0(",
"fun2(", "LOOP", "for", "if", "break", "funLod0(",
"main(", "FLATTEN", "if", "fun2("
};
expect(expectations, ArraySize(expectations));
}
}