Loop count for all trivial form loops. Mem2Reg only when necessary. (#2250)

* Comments and small adjustments

* Deleting loops from scalar evolution correctly

* Better error/warning message
This commit is contained in:
Adam Yang 2019-06-07 18:40:43 -07:00 коммит произвёл GitHub
Родитель f749543a76
Коммит e32833cac7
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 221 добавлений и 25 удалений

Просмотреть файл

@ -261,6 +261,7 @@ void initializeResourceToHandlePass(PassRegistry&);
void initializeSROA_SSAUp_HLSLPass(PassRegistry&);
void initializeHoistConstantArrayPass(PassRegistry&);
void initializeDxilLoopUnrollPass(PassRegistry&);
void initializeDxilConditionalMem2RegPass(PassRegistry&);
void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
// HLSL Change Ends
void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);

Просмотреть файл

@ -134,6 +134,9 @@ void initializeSROA_Parameter_HLSLPass(PassRegistry&);
Pass *createDxilFixConstArrayInitializerPass();
void initializeDxilFixConstArrayInitializerPass(PassRegistry&);
Pass *createDxilConditionalMem2RegPass(bool NoOpt);
void initializeDxilConditionalMem2RegPass(PassRegistry&);
Pass *createDxilLoopUnrollPass(unsigned MaxIterationAttempt);
void initializeDxilLoopUnrollPass(PassRegistry&);
//===----------------------------------------------------------------------===//

Просмотреть файл

@ -87,6 +87,7 @@ HRESULT SetupRegistryPassForHLSL() {
initializeDxilAllocateResourcesForLibPass(Registry);
initializeDxilCleanupAddrSpaceCastPass(Registry);
initializeDxilCondenseResourcesPass(Registry);
initializeDxilConditionalMem2RegPass(Registry);
initializeDxilConvergentClearPass(Registry);
initializeDxilConvergentMarkPass(Registry);
initializeDxilDeadFunctionEliminationPass(Registry);
@ -190,6 +191,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
static const LPCSTR ArgPromotionArgs[] = { "maxElements" };
static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" };
static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "sv-position-index", "num-pixels" };
static const LPCSTR DxilConditionalMem2RegArgs[] = { "NoOpt" };
static const LPCSTR DxilDebugInstrumentationArgs[] = { "UAVSize", "parameter0", "parameter1", "parameter2" };
static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" };
static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" };
@ -223,6 +225,7 @@ static ArrayRef<LPCSTR> GetPassArgNames(LPCSTR passName) {
if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
if (strcmp(passName, "dxil-cond-mem2reg") == 0) return ArrayRef<LPCSTR>(DxilConditionalMem2RegArgs, _countof(DxilConditionalMem2RegArgs));
if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
@ -263,6 +266,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
static const LPCSTR ArgPromotionArgs[] = { "None" };
static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" };
static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None", "None" };
static const LPCSTR DxilConditionalMem2RegArgs[] = { "None" };
static const LPCSTR DxilDebugInstrumentationArgs[] = { "None", "None", "None", "None" };
static const LPCSTR DxilGenerationPassArgs[] = { "None" };
static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" };
@ -296,6 +300,7 @@ static ArrayRef<LPCSTR> GetPassArgDescriptions(LPCSTR passName) {
if (strcmp(passName, "argpromotion") == 0) return ArrayRef<LPCSTR>(ArgPromotionArgs, _countof(ArgPromotionArgs));
if (strcmp(passName, "simplifycfg") == 0) return ArrayRef<LPCSTR>(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs));
if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef<LPCSTR>(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs));
if (strcmp(passName, "dxil-cond-mem2reg") == 0) return ArrayRef<LPCSTR>(DxilConditionalMem2RegArgs, _countof(DxilConditionalMem2RegArgs));
if (strcmp(passName, "hlsl-dxil-debug-instrumentation") == 0) return ArrayRef<LPCSTR>(DxilDebugInstrumentationArgs, _countof(DxilDebugInstrumentationArgs));
if (strcmp(passName, "dxilgen") == 0) return ArrayRef<LPCSTR>(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs));
if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef<LPCSTR>(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs));
@ -340,6 +345,7 @@ static bool IsPassOptionName(StringRef S) {
|| S.equals("InlineThreshold")
|| S.equals("InsertLifetime")
|| S.equals("MaxHeaderSize")
|| S.equals("NoOpt")
|| S.equals("NotOptimized")
|| S.equals("Os")
|| S.equals("ReplaceAllVectors")

Просмотреть файл

@ -252,10 +252,12 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
// Change dynamic indexing vector to array.
MPM.add(createDynamicIndexingVectorToArrayPass(NoOpt));
if (!NoOpt) {
// mem2reg
MPM.add(createPromoteMemoryToRegisterPass());
// Special Mem2Reg pass that only happens if optimization is
// enabled or loop unroll is needed.
MPM.add(createDxilConditionalMem2RegPass(NoOpt));
if (!NoOpt) {
MPM.add(createDxilConvergentMarkPass());
}
@ -269,7 +271,7 @@ static void addHLSLPasses(bool HLSLHighLevel, unsigned OptLevel, hlsl::HLSLExten
// Needs to happen before resources are lowered and before HL
// module is gone.
MPM.add(createLoopRotatePass());
MPM.add(createDxilLoopUnrollPass(/*MaxIterationAttempt*/ 128));
MPM.add(createDxilLoopUnrollPass(1024));
// Default unroll pass. This is purely for optimizing loops without
// attributes.

Просмотреть файл

@ -60,6 +60,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@ -74,6 +75,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/IR/LegacyPassManager.h"
#include "dxc/DXIL/DxilUtil.h"
#include "dxc/HLSL/HLModule.h"
@ -110,7 +112,7 @@ public:
std::unordered_set<Function *> CleanedUpAlloca;
unsigned MaxIterationAttempt = 0;
DxilLoopUnroll(unsigned MaxIterationAttempt = 128) :
DxilLoopUnroll(unsigned MaxIterationAttempt = 1024) :
LoopPass(ID),
MaxIterationAttempt(MaxIterationAttempt)
{
@ -120,16 +122,17 @@ public:
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
}
};
char DxilLoopUnroll::ID;
static void FailLoopUnroll(bool WarnOnly, LLVMContext &Ctx, DebugLoc DL, const char *Message) {
static void FailLoopUnroll(bool WarnOnly, LLVMContext &Ctx, DebugLoc DL, const Twine &Message) {
if (WarnOnly) {
if (DL)
Ctx.emitWarning(hlsl::dxilutil::FormatMessageAtLocation(DL, Message));
@ -684,6 +687,7 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
DebugLoc LoopLoc = L->getStartLoc(); // Debug location for the start of the loop.
Function *F = L->getHeader()->getParent();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
bool HasExplicitLoopCount = false;
int ExplicitUnrollCountSigned = 0;
@ -714,6 +718,18 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
FxcCompatMode = HM.GetHLOptions().bFXCCompatMode;
}
unsigned TripCount = 0;
unsigned TripMultiple = 0;
bool HasTripCount = false;
BasicBlock *ExitingBlock = L->getLoopLatch();
if (!ExitingBlock || !L->isLoopExiting(ExitingBlock))
ExitingBlock = L->getExitingBlock();
if (ExitingBlock) {
TripCount = SE->getSmallConstantTripCount(L, ExitingBlock);
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
HasTripCount = TripMultiple != 1 || TripCount == 1;
}
// Analysis passes
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AssumptionCache *AC =
@ -736,12 +752,6 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
// Promote alloca's
if (!CleanedUpAlloca.count(F)) {
CleanedUpAlloca.insert(F);
Mem2Reg(*F, *DT, *AC);
}
SmallVector<BasicBlock *, 16> ExitBlocks;
L->getExitBlocks(ExitBlocks);
std::unordered_set<BasicBlock *> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
@ -839,9 +849,15 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
SmallVector<std::unique_ptr<LoopIteration>, 16> Iterations; // List of cloned iterations
bool Succeeded = false;
if (HasExplicitLoopCount) {
this->MaxIterationAttempt = std::max(this->MaxIterationAttempt, ExplicitUnrollCount);
// If we were able to figure out the definitive trip count,
// just unroll that many times.
if (HasTripCount) {
this->MaxIterationAttempt = TripCount;
}
else if (HasExplicitLoopCount) {
this->MaxIterationAttempt = ExplicitUnrollCount;
}
for (unsigned IterationI = 0; IterationI < this->MaxIterationAttempt; IterationI++) {
LoopIteration *PrevIteration = nullptr;
@ -957,7 +973,9 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// We've reached the N defined in [unroll(N)]
if (HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) {
if ((HasExplicitLoopCount && IterationI+1 >= ExplicitUnrollCount) ||
(HasTripCount && IterationI+1 >= TripCount))
{
Succeeded = true;
BranchInst *BI = cast<BranchInst>(CurIteration.Latch->getTerminator());
@ -1024,6 +1042,8 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
}
SE->forgetLoop(L);
// Remove the original blocks that we've cloned from all loops.
for (BasicBlock *BB : ToBeCloned)
LI->removeBlock(BB);
@ -1061,9 +1081,16 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we were unsuccessful in unrolling the loop
else {
FailLoopUnroll(FxcCompatMode /*warn only*/, F->getContext(), LoopLoc,
const char *Msg =
"Could not unroll loop. Loop bound could not be deduced at compile time. "
"To give an explicit unroll bound, use unroll(n).");
"Use [unroll(n)] to give an explicit count.";
if (FxcCompatMode) {
FailLoopUnroll(true /*warn only*/, F->getContext(), LoopLoc, Msg);
}
else {
FailLoopUnroll(false /*warn only*/, F->getContext(), LoopLoc,
Twine(Msg) + Twine(" Use '-HV 2016' to treat this as warning."));
}
// Remove all the cloned blocks
for (std::unique_ptr<LoopIteration> &Ptr : Iterations) {
@ -1088,8 +1115,88 @@ bool DxilLoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Special Mem2Reg pass
//
// In order to figure out loop bounds to unroll, we must first run mem2reg pass
// on the function, but we don't want to run mem2reg on functions that don't
// have to be unrolled when /Od is given. This pass considers all these
// conditions and runs mem2reg on functions only when needed.
//
class DxilConditionalMem2Reg : public FunctionPass {
public:
static char ID;
// Function overrides that resolve options when used for DxOpt
void applyOptions(PassOptions O) {
GetPassOptionBool(O, "NoOpt", &NoOpt, false);
}
void dumpConfig(raw_ostream &OS) {
FunctionPass::dumpConfig(OS);
OS << ",NoOpt=" << NoOpt;
}
bool NoOpt = false;
explicit DxilConditionalMem2Reg(bool NoOpt=false) : FunctionPass(ID), NoOpt(NoOpt)
{
initializeDxilConditionalMem2RegPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.setPreservesCFG();
}
// Recursively find loops that are marked with [unroll]
static bool HasLoopsMarkedUnrollRecursive(Loop *L) {
int Count = 0;
if (IsMarkedFullUnroll(L) || IsMarkedUnrollCount(L, &Count)) {
return true;
}
for (Loop *ChildLoop : *L) {
if (HasLoopsMarkedUnrollRecursive(ChildLoop))
return true;
}
return false;
}
bool runOnFunction(Function &F) {
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AssumptionCache *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
bool NeedPromote = false;
bool Changed = false;
if (NoOpt) {
// If any of the functions are marked as full unroll.
for (Loop *L : *LI) {
if (HasLoopsMarkedUnrollRecursive(L)) {
NeedPromote = true;
break;
}
}
}
else {
NeedPromote = true;
}
if (NeedPromote)
Changed |= Mem2Reg(F, *DT, *AC);
return Changed;
}
};
char DxilConditionalMem2Reg::ID;
Pass *llvm::createDxilConditionalMem2RegPass(bool NoOpt) {
return new DxilConditionalMem2Reg(NoOpt);
}
Pass *llvm::createDxilLoopUnrollPass(unsigned MaxIterationAttempt) {
return new DxilLoopUnroll(MaxIterationAttempt);
}
INITIALIZE_PASS(DxilConditionalMem2Reg, "dxil-cond-mem2reg", "Dxil Conditional Mem2Reg", false, false)
INITIALIZE_PASS(DxilLoopUnroll, "dxil-loop-unroll", "Dxil Unroll loops", false, false)

Просмотреть файл

@ -1,4 +1,4 @@
// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s | XFail GitHub #2080
// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
// Test that precise modifier on a matrix has an effect.

Просмотреть файл

@ -0,0 +1,38 @@
// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
// CHECK: @main
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK-NOT: @dx.op.unary.f32(i32 13
// Confirm that loops with greater than 1 step should be able to be unrolled
[RootSignature("")]
float main(float y : Y) : SV_Target {
float x = 0;
static const uint kLoopCount = 512;
[unroll]
for (uint i = 0; i < kLoopCount; i += 32) {
x = sin(x * x + y);
}
return x;
}

Просмотреть файл

@ -0,0 +1,33 @@
// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
// CHECK: @main
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK: @dx.op.unary.f32(i32 13
// CHECK-NOT: @dx.op.unary.f32(i32 13
// Confirm that loops with fairly complex exit conditions
// should be able to be unrolled
[RootSignature("")]
float main(float y : Y) : SV_Target {
float x = 0;
static const uint kLoopCount = 512;
int j = 10;
[unroll]
for (uint i = 0; i < kLoopCount && j > 2; i += 16) {
x = sin(x * x + y);
i -= 8;
j -= 1;
}
return x;
}

Просмотреть файл

@ -1,5 +1,6 @@
// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
// CHECK-DAG: Could not unroll loop.
// CHECK-DAG: -HV 2016
// CHECK-NOT: @main
// Check that the compilation fails due to unable to

Просмотреть файл

@ -1,7 +1,7 @@
// RUN: %dxc -Od -E main -T ps_6_0 %s | FileCheck %s
// CHECK: Could not unroll loop
// CHECK: To give an explicit unroll bound, use unroll(n)
// CHECK-NOT: @main
// CHECK: @main
// Confirm that simple loops should be able to be unrolled
[RootSignature("")]
float main(float y : Y) : SV_Target {

Просмотреть файл

@ -1,7 +1,9 @@
// RUN: %dxc -HV 2016 -Od -E main -T ps_6_0 %s | FileCheck %s
// RUN: %dxc /HV 2016 -Od -E main -T ps_6_0 %s | FileCheck %s
// CHECK-DAG: warning: Could not unroll loop.
// CHECK-NOT: -HV 2016
// CHECK-NOT: @main
// Check that the warning doesn't mention HV 2016
// Check that the compilation fails due to unable to
// find the loop bound.

Просмотреть файл

@ -1525,6 +1525,9 @@ class db_dxil(object):
{'n':'force-ssa-updater', 'i':'ForceSSAUpdater', 't':'bool', 'd':'Force the pass to not use DomTree and mem2reg, insteadforming SSA values through the SSAUpdater infrastructure.'},
{'n':'sroa-random-shuffle-slices', 'i':'SROARandomShuffleSlices', 't':'bool', 'd':'Enable randomly shuffling the slices to help uncover instability in their order.'},
{'n':'sroa-strict-inbounds', 'i':'SROAStrictInbounds', 't':'bool', 'd':'Experiment with completely strict handling of inbounds GEPs.'}])
add_pass("dxil-cond-mem2reg", "DxilConditionalMem2Reg", "Dxil Conditional Mem2Reg", [
{'n':'NoOpt', 't':'bool', 'c':1},
])
add_pass('scalarrepl', 'SROA_DT', 'Scalar Replacement of Aggregates (DT)', [
{'n':'Threshold', 't':'int', 'c':1},
{'n':'StructMemberThreshold', 't':'int', 'c':1},