diff --git a/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp b/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp index 43d513bcc..2efd25c1a 100644 --- a/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp +++ b/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp @@ -130,10 +130,12 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -456,6 +458,79 @@ static SmallVector CollectExitValues(Value *new_exit_cond, return exit_values; } +// Ensures the branch from exiting_block to outside L escapes exactly one +// level of loop nesting, and does not immediately jump into an otherwise +// unrelated loop. Creates a downstream block as needed. If the exiting edge is +// critical, it will be split. Updates dominator tree and loop info. Returns +// true if any changes were made. +static bool EnsureSingleLevelExit(Loop *L, LoopInfo *LI, DominatorTree *DT, + BasicBlock *exiting_block) { + BasicBlock *exit_block = GetExitBlockForExitingBlock(L, exiting_block); + + Loop *exit_loop = LI->getLoopFor(exit_block); + assert(L != exit_loop); + + Loop *parent_loop = L->getParentLoop(); + if (parent_loop != exit_loop) { + // Split the edge between the blocks, returning the newly created block. + BasicBlock *new_bb = SplitEdge(exiting_block, exit_block, DT, LI); + // The new block might be in the middle or at the end. + BasicBlock *middle_bb; + if (new_bb->getSingleSuccessor() == exit_block) { + middle_bb = new_bb; + } else { + middle_bb = exit_block; + exit_block = new_bb; + } + + // What loop does middle_bb end up in? SplitEdge has these cases: + // If the edge was critical: + // if source block is not in a loop: ruled out already + // if dest block is not in a loop --> not in any loop. + // if going from outer loop to inner loop: ruled out already + // if going from inner loop to outer loop --> outer loop + // if loops unrelated by containment -> the parent loop of the + // destination block (which must be a loop header because we + // assume irreducible loops). + // If the edge was non-critcial: + // If the exit block only had one incominge edge --> same loop as + // destination block. + // otherwise the exiting block had a single successor. + // This is ruled out because the the exiting block ends with a + // conditional branch, and so has two successors. + + // Move the middle_block to the parent loop, if it exists. + // If all goes well, the latch exit block will branch to it. + // If the algorithm bails early, then there is no harm in putting + // it in L's parent loop. At worst it will be an exiting block for + // the parent loop. + LI->removeBlock(middle_bb); + if (parent_loop) { + parent_loop->addBasicBlockToLoop(middle_bb, *LI); + + // middle_bb block is now an exiting block, going from parent_loop to + // exit_loop, which we know are different. Make sure it ends in a + // in a conditional branch, as expected by the rest of the algorithm. + auto *br = cast(middle_bb->getTerminator()); + assert(!br->isConditional()); + auto *true_val = ConstantInt::getTrue(br->getContext()); + br->eraseFromParent(); + BasicBlock *parent_latch = parent_loop->getLoopLatch(); + BranchInst::Create(exit_block, parent_latch, true_val, middle_bb); + // Fix phis in parent_latch + for (Instruction &inst : *parent_latch) { + PHINode *phi = dyn_cast(&inst); + if (!phi) + break; + // We don't care about the values. The path is never taken. + phi->addIncoming(GetDefaultValue(phi->getType()), middle_bb); + } + } + return true; + } + return false; +} + // Restructures exiting_block so its work, including its exit branch, is moved // to a block B that dominates the latch block. Let's call B the // newly-exiting-block. @@ -465,6 +540,15 @@ static bool RemoveUnstructuredLoopExitsIteration(BasicBlock *exiting_block, Loop *L, LoopInfo *LI, DominatorTree *DT) { BasicBlock *latch = L->getLoopLatch(); + + if (EnsureSingleLevelExit(L, LI, DT, latch)) { + // Exit early so we're forced to recompute exit blocks. + return true; + } + if (EnsureSingleLevelExit(L, LI, DT, exiting_block)) { + return true; + } + BasicBlock *latch_exit = GetExitBlockForExitingBlock(L, latch); BasicBlock *exit_block = GetExitBlockForExitingBlock(L, exiting_block); diff --git a/test/HLSL/passes/dxil_remove_unstructured_loop_exits/exit-to-unrelated-loop.ll b/test/HLSL/passes/dxil_remove_unstructured_loop_exits/exit-to-unrelated-loop.ll new file mode 100644 index 000000000..636b9d4cf --- /dev/null +++ b/test/HLSL/passes/dxil_remove_unstructured_loop_exits/exit-to-unrelated-loop.ll @@ -0,0 +1,93 @@ +; RUN: opt %s -analyze -loops | FileCheck -check-prefix=LOOPBEFORE %s +; RUN: opt %s -dxil-remove-unstructured-loop-exits -o %t.bc +; RUN: opt %t.bc -S | FileCheck %s +; RUN: opt %t.bc -analyze -loops | FileCheck -check-prefix=LOOPAFTER %s + +; The exiting edge goes to the header of a completely unrelated loop. +; That edge is 'critical' in CFG terms, and will be split before attempting +; to restructure the exit. + + +; entry +; | +---------+ +; v v | +; header.1 --> if.1 -----> header.u2 -+ +; ^ | | +; | | | +; | +-------- endif.1 end.u2 +; | | +; | v +; latch.1 +; | +; v +; end +; + +; LOOPBEFORE-DAG: Loop at depth 1 containing: %header.u2
+; LOOPBEFORE-DAG: Loop at depth 1 containing: %header.1
,%if.1,%endif.1,%latch.1 +; LOOPBEFORE-NOT: Loop at depth + +; LOOPAFTER-DAG: Loop at depth 1 containing: %header.u2
+; LOOPAFTER-DAG: Loop at depth 1 containing: %header.1
,%if.1,%endif.1,%latch.1 +; LOOPAFTER-NOT: Loop at depth + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + + +define void @main(i1 %cond) { +entry: + br label %header.1 + +header.1: + br label %if.1 + +if.1: + br i1 %cond, label %header.u2, label %endif.1 + +endif.1: + br label %latch.1 + +latch.1: + br i1 %cond, label %end, label %header.1 + +end: + ret void + +header.u2: + br i1 %cond, label %end.u2, label %header.u2 + +end.u2: + ret void +} + + +; CHECK: define void @main +; CHECK: entry: +; CHECK: br label %header.1 + +; CHECK: header.1: +; CHECK: br label %if.1 + +; CHECK: if.1: +; CHECK: br i1 %cond, label %if.1.header.u2_crit_edge, label %endif.1 + +; CHECK: if.1.header.u2_crit_edge: +; CHECK: br label %header.u2 + +; CHECK: endif.1: +; CHECK: br label %latch.1 + +; CHECK: latch.1: +; CHECK: br i1 %cond, label %end, label %header.1 + +; CHECK: end: +; CHECK: ret void + +; CHECK: header.u2: +; CHECK: br i1 %cond, label %end.u2, label %header.u2 + +; CHECK: end.u2: +; CHECK: ret void +; CHECK: } diff --git a/test/HLSL/passes/dxil_remove_unstructured_loop_exits/multi-level-exit-from-latch.ll b/test/HLSL/passes/dxil_remove_unstructured_loop_exits/multi-level-exit-from-latch.ll new file mode 100644 index 000000000..d265e160f --- /dev/null +++ b/test/HLSL/passes/dxil_remove_unstructured_loop_exits/multi-level-exit-from-latch.ll @@ -0,0 +1,166 @@ +; RUN: opt %s -analyze -loops | FileCheck -check-prefix=LOOPBEFORE %s +; RUN: opt %s -dxil-remove-unstructured-loop-exits -o %t.bc +; RUN: opt %t.bc -S | FileCheck %s +; RUN: opt %t.bc -analyze -loops | FileCheck -check-prefix=LOOPAFTER %s + +; The exiting edge from the latch block of the loop at depth 3 exits to the loop at depth 1. +; This reproduces the original bug. +; +; Loop exits are 'dedicated', one of the LoopSimplifyForm criteria. + +; +; entry +; | +; v +; header.1 --> header.2 --> header.3 --> if.3 -----> exiting.3 +; ^ ^ ^ | | | +; | | | v | | +; | | latch.3 <--- endif.3 <----+ | +; | | | | +; | | | v +; | latch.2 <----------------------------- exit.3.to.2 +; | | | +; | +-------- latch.2.exit | +; | | | +; | | v +; | | latch.3.exit +; | | | +; | v | +; latch.1 <-----------------+ +; | +; v +; end +; + + +; LOOPBEFORE: Loop at depth 1 containing: %header.1
,%header.2,%header.3,%if.3,%exiting.3,%endif.3,%latch.3,%latch.3.exit,%exit.3.to.2,%latch.2,%latch.2.exit,%latch.1 +; LOOPBEFORE-NEXT: Loop at depth 2 containing: %header.2
,%header.3,%if.3,%exiting.3,%endif.3,%latch.3,%exit.3.to.2,%latch.2 +; LOOPBEFORE-NEXT: Loop at depth 3 containing: %header.3
,%if.3,%exiting.3,%endif.3,%latch.3 +; no more loops expected +; LOOPBEFORE-NOT: Loop at depth + +; LOOPAFTER: Loop at depth 1 containing: %header.1
,%header.2,%header.3,%if.3,%exiting.3,%dx.struct_exit.new_exiting,%endif.3,%latch.3,%latch.3.exit,%0,%exit.3.to.2,%dx.struct_exit.new_exiting4,%latch.2,%latch.2.exit,%1,%latch.3.exit.split,%latch.1 +; LOOPAFTER-NEXT: Loop at depth 2 containing: %header.2
,%header.3,%if.3,%exiting.3,%dx.struct_exit.new_exiting,%endif.3,%latch.3,%latch.3.exit,%0,%exit.3.to.2,%dx.struct_exit.new_exiting4,%latch.2 +; LOOPAFTER-NEXT: Loop at depth 3 containing: %header.3
,%if.3,%exiting.3,%dx.struct_exit.new_exiting,%endif.3,%latch.3 +; no more loops expected +; LOOPAFTER-NOT: Loop at depth + + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + + +define void @main(i1 %cond) { +entry: + br label %header.1 + +header.1: + br label %header.2 + +header.2: + br label %header.3 + +header.3: + br label %if.3 + +if.3: + br i1 %cond, label %exiting.3, label %endif.3 + +exiting.3: + %x3val = add i32 0, 0 + br i1 %cond, label %exit.3.to.2, label %endif.3 + +endif.3: + br label %latch.3 + +latch.3: + br i1 %cond, label %latch.3.exit, label %header.3 + +latch.3.exit: + br label %latch.1 + +latch.2: + %l2val = phi i32 [ %x3val, %exit.3.to.2 ] + br i1 %cond, label %latch.2.exit, label %header.2 + +latch.2.exit: + br label %latch.1 + +exit.3.to.2: + br label %latch.2 + +latch.1: + br i1 %cond, label %end, label %header.1 + +end: + ret void +} + + +; CHECK: define void @main(i1 %cond) { +; CHECK: entry: +; CHECK: br label %header.1 + +; CHECK: header.1: +; CHECK: br label %header.2 + +; CHECK: header.2: +; CHECK: br label %header.3 + +; CHECK: header.3: +; CHECK: br label %if.3 + +; CHECK: if.3: +; CHECK: br i1 %cond, label %exiting.3, label %dx.struct_exit.new_exiting + +; CHECK: exiting.3: +; CHECK: %x3val = add i32 0, 0 +; CHECK: br label %dx.struct_exit.new_exiting + +; CHECK: dx.struct_exit.new_exiting: +; CHECK: %dx.struct_exit.prop1 = phi i1 [ %cond, %exiting.3 ], [ false, %if.3 ] +; CHECK: %dx.struct_exit.prop = phi i32 [ %x3val, %exiting.3 ], [ 0, %if.3 ] +; CHECK: br i1 %dx.struct_exit.prop1, label %latch.3.exit, label %endif.3 + +; CHECK: endif.3: +; CHECK: br label %latch.3 + +; CHECK: latch.3: +; CHECK: br i1 %cond, label %latch.3.exit, label %header.3 + +; CHECK: latch.3.exit: +; CHECK: %dx.struct_exit.exit_cond_lcssa = phi i1 [ %dx.struct_exit.prop1, %dx.struct_exit.new_exiting ], [ false, %latch.3 ] +; CHECK: %dx.struct_exit.val_lcssa = phi i32 [ %dx.struct_exit.prop, %dx.struct_exit.new_exiting ], [ 0, %latch.3 ] +; CHECK: br i1 %dx.struct_exit.exit_cond_lcssa, label %exit.3.to.2, label %0 + +; CHECK: