Fix Merge conflict on Better memcpy propagation. (#1233).

This commit is contained in:
Xiang Li 2018-04-17 11:55:25 -07:00
Родитель 5ab102a352 e1fd0fc9de
Коммит afcd7cb6a7
12 изменённых файлов: 466 добавлений и 177 удалений

Просмотреть файл

@ -1282,7 +1282,7 @@ Value *TranslateAtan2(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
Constant *halfPi = ConstantFP::get(Ty->getScalarType(), M_PI / 2);
Constant *negHalfPi = ConstantFP::get(Ty->getScalarType(), -M_PI / 2);
Constant *zero = ConstantFP::get(Ty->getScalarType(), 0);
if (Ty != Ty->getScalarType()) {
if (Ty->isVectorTy()) {
unsigned vecSize = Ty->getVectorNumElements();
pi = ConstantVector::getSplat(vecSize, pi);
halfPi = ConstantVector::getSplat(vecSize, halfPi);

Просмотреть файл

@ -703,9 +703,9 @@ void collectInputOutputAccessInfo(
Value *GV, Constant *constZero,
std::vector<InputOutputAccessInfo> &accessInfoList, bool hasVertexID,
bool bInput, bool bRowMajor) {
auto User = GV->user_begin();
auto UserE = GV->user_end();
for (; User != UserE;) {
// merge GEP use for input output.
HLModule::MergeGepUse(GV);
for (auto User = GV->user_begin(); User != GV->user_end();) {
Value *I = *(User++);
if (LoadInst *ldInst = dyn_cast<LoadInst>(I)) {
if (bInput) {

Просмотреть файл

@ -20,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
@ -58,6 +59,7 @@
#include <deque>
#include <unordered_map>
#include <unordered_set>
#include <queue>
using namespace llvm;
using namespace hlsl;
@ -77,11 +79,13 @@ public:
static bool DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
IRBuilder<> &Builder, bool bFlatVector,
bool hasPrecise, DxilTypeSystem &typeSys,
const DataLayout &DL,
SmallVector<Value *, 32> &DeadInsts);
static bool DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
IRBuilder<> &Builder, bool bFlatVector,
bool hasPrecise, DxilTypeSystem &typeSys,
const DataLayout &DL,
SmallVector<Value *, 32> &DeadInsts);
// Lower memcpy related to V.
static bool LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
@ -92,8 +96,9 @@ public:
static bool IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys);
private:
SROA_Helper(Value *V, ArrayRef<Value *> Elts,
SmallVector<Value *, 32> &DeadInsts)
: OldVal(V), NewElts(Elts), DeadInsts(DeadInsts) {}
SmallVector<Value *, 32> &DeadInsts, DxilTypeSystem &ts,
const DataLayout &dl)
: OldVal(V), NewElts(Elts), DeadInsts(DeadInsts), typeSys(ts), DL(dl) {}
void RewriteForScalarRepl(Value *V, IRBuilder<> &Builder);
private:
@ -102,6 +107,8 @@ private:
// Flattened elements for OldVal.
ArrayRef<Value*> NewElts;
SmallVector<Value *, 32> &DeadInsts;
DxilTypeSystem &typeSys;
const DataLayout &DL;
void RewriteForConstExpr(ConstantExpr *user, IRBuilder<> &Builder);
void RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder);
@ -267,7 +274,8 @@ public:
static void PatchMemCpyWithZeroIdxGEP(MemCpyInst *MI, const DataLayout &DL);
static void SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
DxilFieldAnnotation *fieldAnnotation,
DxilTypeSystem &typeSys);
DxilTypeSystem &typeSys,
const bool bEltMemCpy = true);
};
}
@ -1525,135 +1533,139 @@ bool SROA_HLSL::ShouldAttemptScalarRepl(AllocaInst *AI) {
bool SROA_HLSL::performScalarRepl(Function &F, DxilTypeSystem &typeSys) {
std::vector<AllocaInst *> AllocaList;
const DataLayout &DL = F.getParent()->getDataLayout();
// Scan the entry basic block, adding allocas to the worklist.
BasicBlock &BB = F.getEntryBlock();
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
if (A->hasNUsesOrMore(1))
AllocaList.emplace_back(A);
}
// merge GEP use for the allocs
for (auto A : AllocaList)
HLModule::MergeGepUse(A);
// Make sure big alloca split first.
// This will simplify memcpy check between part of big alloca and small
// alloca. Big alloca will be split to smaller piece first, when process the
// alloca, it will be alloca flattened from big alloca instead of a GEP of big
// alloca.
auto size_cmp = [&DL](const AllocaInst *a0, const AllocaInst *a1) -> bool {
return DL.getTypeAllocSize(a0->getAllocatedType()) >
return DL.getTypeAllocSize(a0->getAllocatedType()) <
DL.getTypeAllocSize(a1->getAllocatedType());
};
std::sort(AllocaList.begin(), AllocaList.end(), size_cmp);
std::priority_queue<AllocaInst *, std::vector<AllocaInst *>,
std::function<bool(AllocaInst *, AllocaInst *)>>
WorkList(size_cmp);
std::unordered_map<AllocaInst*, DbgDeclareInst*> DDIMap;
// Scan the entry basic block, adding allocas to the worklist.
BasicBlock &BB = F.getEntryBlock();
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
if (AllocaInst *A = dyn_cast<AllocaInst>(I)) {
if (!A->user_empty()) {
WorkList.push(A);
// merge GEP use for the allocs
HLModule::MergeGepUse(A);
if (DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(A)) {
DDIMap[A] = DDI;
}
}
}
DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
// Process the worklist
bool Changed = false;
for (AllocaInst *Alloc : AllocaList) {
DbgDeclareInst *DDI = llvm::FindAllocaDbgDeclare(Alloc);
unsigned debugOffset = 0;
std::deque<AllocaInst *> WorkList;
WorkList.emplace_back(Alloc);
while (!WorkList.empty()) {
AllocaInst *AI = WorkList.front();
WorkList.pop_front();
while (!WorkList.empty()) {
AllocaInst *AI = WorkList.top();
WorkList.pop();
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// with unused elements.
if (AI->use_empty()) {
// Handle dead allocas trivially. These can be formed by SROA'ing arrays
// with unused elements.
if (AI->use_empty()) {
AI->eraseFromParent();
Changed = true;
continue;
}
const bool bAllowReplace = true;
if (SROA_Helper::LowerMemcpy(AI, /*annotation*/ nullptr, typeSys, DL,
bAllowReplace)) {
Changed = true;
continue;
}
// If this alloca is impossible for us to promote, reject it early.
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0)
continue;
Type *Ty = AI->getAllocatedType();
// Skip empty struct type.
if (SROA_Helper::IsEmptyStructType(Ty, typeSys)) {
SROA_Helper::MarkEmptyStructUsers(AI, DeadInsts);
DeleteDeadInstructions();
continue;
}
// If the alloca looks like a good candidate for scalar replacement, and
// if
// all its users can be transformed, then split up the aggregate into its
// separate elements.
if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
std::vector<Value *> Elts;
IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
bool SROAed = SROA_Helper::DoScalarReplacement(
AI, Elts, Builder, /*bFlatVector*/ true, hasPrecise, typeSys, DL,
DeadInsts);
if (SROAed) {
Type *Ty = AI->getAllocatedType();
// Skip empty struct parameters.
if (StructType *ST = dyn_cast<StructType>(Ty)) {
if (!HLMatrixLower::IsMatrixType(Ty)) {
DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
if (SA && SA->IsEmptyStruct()) {
for (User *U : AI->users()) {
if (StoreInst *SI = dyn_cast<StoreInst>(U))
DeadInsts.emplace_back(SI);
}
DeleteDeadInstructions();
AI->replaceAllUsesWith(UndefValue::get(AI->getType()));
AI->eraseFromParent();
continue;
}
}
}
DbgDeclareInst *DDI = nullptr;
unsigned debugOffset = 0;
auto iter = DDIMap.find(AI);
if (iter != DDIMap.end()) {
DDI = iter->second;
}
// Push Elts into workList.
for (auto iter = Elts.begin(); iter != Elts.end(); iter++) {
AllocaInst *Elt = cast<AllocaInst>(*iter);
WorkList.push(Elt);
if (DDI) {
Type *Ty = Elt->getAllocatedType();
unsigned size = DL.getTypeAllocSize(Ty);
DIExpression *DDIExp =
DIB.createBitPieceExpression(debugOffset, size);
debugOffset += size;
DbgDeclareInst *EltDDI = cast<DbgDeclareInst>(DIB.insertDeclare(
Elt, DDI->getVariable(), DDIExp, DDI->getDebugLoc(), DDI));
DDIMap[Elt] = EltDDI;
}
}
// Now erase any instructions that were made dead while rewriting the
// alloca.
DeleteDeadInstructions();
++NumReplaced;
AI->eraseFromParent();
Changed = true;
continue;
}
const bool bAllowReplace = true;
if (SROA_Helper::LowerMemcpy(AI, /*annotation*/ nullptr, typeSys, DL,
bAllowReplace)) {
Changed = true;
continue;
}
// If this alloca is impossible for us to promote, reject it early.
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0)
continue;
Type *Ty = AI->getAllocatedType();
// Skip empty struct type.
if (SROA_Helper::IsEmptyStructType(Ty, typeSys)) {
SROA_Helper::MarkEmptyStructUsers(AI, DeadInsts);
DeleteDeadInstructions();
continue;
}
// If the alloca looks like a good candidate for scalar replacement, and
// if
// all its users can be transformed, then split up the aggregate into its
// separate elements.
if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
std::vector<Value *> Elts;
IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(AI));
bool hasPrecise = HLModule::HasPreciseAttributeWithMetadata(AI);
bool SROAed = SROA_Helper::DoScalarReplacement(
AI, Elts, Builder, /*bFlatVector*/ true, hasPrecise, typeSys,
DeadInsts);
if (SROAed) {
Type *Ty = AI->getAllocatedType();
// Skip empty struct parameters.
if (StructType *ST = dyn_cast<StructType>(Ty)) {
if (!HLMatrixLower::IsMatrixType(Ty)) {
DxilStructAnnotation *SA = typeSys.GetStructAnnotation(ST);
if (SA && SA->IsEmptyStruct()) {
for (User *U : AI->users()) {
if (StoreInst *SI = dyn_cast<StoreInst>(U))
DeadInsts.emplace_back(SI);
}
DeleteDeadInstructions();
AI->replaceAllUsesWith(UndefValue::get(AI->getType()));
AI->eraseFromParent();
continue;
}
}
}
// Push Elts into workList.
for (auto iter = Elts.begin(); iter != Elts.end(); iter++)
WorkList.emplace_back(cast<AllocaInst>(*iter));
// Now erase any instructions that were made dead while rewriting the
// alloca.
DeleteDeadInstructions();
++NumReplaced;
AI->eraseFromParent();
Changed = true;
continue;
}
}
// Add debug info.
if (DDI != nullptr && AI != Alloc) {
Type *Ty = AI->getAllocatedType();
unsigned size = DL.getTypeAllocSize(Ty);
DIExpression *DDIExp = DIB.createBitPieceExpression(debugOffset, size);
debugOffset += size;
DIB.insertDeclare(AI, DDI->getVariable(), DDIExp, DDI->getDebugLoc(),
DDI);
}
}
}
@ -2194,18 +2206,61 @@ static void SimpleCopy(Value *Dest, Value *Src,
else
SimpleValCopy(Dest, Src, idxList, Builder);
}
static Value *CreateMergedGEP(Value *Ptr, SmallVector<Value *, 16> &idxList,
IRBuilder<> &Builder) {
if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
// skip idxLIst.begin() because it is included in GEPPtr idx.
IdxList.append(idxList.begin() + 1, idxList.end());
return Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
} else {
return Builder.CreateInBoundsGEP(Ptr, idxList);
}
}
static void EltMemCpy(Type *Ty, Value *Dest, Value *Src,
SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
const DataLayout &DL) {
Value *DestGEP = CreateMergedGEP(Dest, idxList, Builder);
Value *SrcGEP = CreateMergedGEP(Src, idxList, Builder);
unsigned size = DL.getTypeAllocSize(Ty);
Builder.CreateMemCpy(DestGEP, SrcGEP, size, size);
}
static bool IsMemCpyTy(Type *Ty, DxilTypeSystem &typeSys) {
if (!Ty->isAggregateType())
return false;
if (HLMatrixLower::IsMatrixType(Ty))
return false;
if (HLModule::IsHLSLObjectType(Ty))
return false;
if (StructType *ST = dyn_cast<StructType>(Ty)) {
DxilStructAnnotation *STA = typeSys.GetStructAnnotation(ST);
DXASSERT(STA, "require annotation here");
if (STA->IsEmptyStruct())
return false;
// Skip 1 element struct which the element is basic type.
// Because create memcpy will create gep on the struct, memcpy the basic
// type only.
if (ST->getNumElements() == 1)
return IsMemCpyTy(ST->getElementType(0), typeSys);
}
return true;
}
// Split copy into ld/st.
static void SplitCpy(Type *Ty, Value *Dest, Value *Src,
SmallVector<Value *, 16> &idxList, IRBuilder<> &Builder,
DxilTypeSystem &typeSys,
DxilFieldAnnotation *fieldAnnotation) {
const DataLayout &DL, DxilTypeSystem &typeSys,
DxilFieldAnnotation *fieldAnnotation, const bool bEltMemCpy = true) {
if (PointerType *PT = dyn_cast<PointerType>(Ty)) {
Constant *idx = Constant::getIntegerValue(
IntegerType::get(Ty->getContext(), 32), APInt(32, 0));
idxList.emplace_back(idx);
SplitCpy(PT->getElementType(), Dest, Src, idxList, Builder, typeSys,
fieldAnnotation);
SplitCpy(PT->getElementType(), Dest, Src, idxList, Builder, DL, typeSys,
fieldAnnotation, bEltMemCpy);
idxList.pop_back();
} else if (HLMatrixLower::IsMatrixType(Ty)) {
@ -2256,12 +2311,16 @@ static void SplitCpy(Type *Ty, Value *Dest, Value *Src,
return;
for (uint32_t i = 0; i < ST->getNumElements(); i++) {
llvm::Type *ET = ST->getElementType(i);
Constant *idx = llvm::Constant::getIntegerValue(
IntegerType::get(Ty->getContext(), 32), APInt(32, i));
idxList.emplace_back(idx);
DxilFieldAnnotation &EltAnnotation = STA->GetFieldAnnotation(i);
SplitCpy(ET, Dest, Src, idxList, Builder, typeSys, &EltAnnotation);
if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
} else {
DxilFieldAnnotation &EltAnnotation = STA->GetFieldAnnotation(i);
SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, &EltAnnotation,
bEltMemCpy);
}
idxList.pop_back();
}
@ -2273,7 +2332,12 @@ static void SplitCpy(Type *Ty, Value *Dest, Value *Src,
Constant *idx = Constant::getIntegerValue(
IntegerType::get(Ty->getContext(), 32), APInt(32, i));
idxList.emplace_back(idx);
SplitCpy(ET, Dest, Src, idxList, Builder, typeSys, fieldAnnotation);
if (bEltMemCpy && IsMemCpyTy(ET, typeSys)) {
EltMemCpy(ET, Dest, Src, idxList, Builder, DL);
} else {
SplitCpy(ET, Dest, Src, idxList, Builder, DL, typeSys, fieldAnnotation,
bEltMemCpy);
}
idxList.pop_back();
}
@ -2382,8 +2446,16 @@ static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsi
static void PatchZeroIdxGEP(Value *Ptr, Value *RawPtr, MemCpyInst *MI,
unsigned level, IRBuilder<> &Builder) {
Value *zeroIdx = Builder.getInt32(0);
SmallVector<Value *, 2> IdxList(level + 1, zeroIdx);
Value *GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
Value *GEP = nullptr;
if (GEPOperator *GEPPtr = dyn_cast<GEPOperator>(Ptr)) {
SmallVector<Value *, 2> IdxList(GEPPtr->idx_begin(), GEPPtr->idx_end());
// level not + 1 because it is included in GEPPtr idx.
IdxList.append(level, zeroIdx);
GEP = Builder.CreateInBoundsGEP(GEPPtr->getPointerOperand(), IdxList);
} else {
SmallVector<Value *, 2> IdxList(level + 1, zeroIdx);
GEP = Builder.CreateInBoundsGEP(Ptr, IdxList);
}
// Use BitCastInst::Create to prevent idxList from being optimized.
CastInst *Cast =
BitCastInst::Create(Instruction::BitCast, GEP, RawPtr->getType());
@ -2471,7 +2543,7 @@ static void DeleteMemcpy(MemCpyInst *MI) {
void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
DxilFieldAnnotation *fieldAnnotation,
DxilTypeSystem &typeSys) {
DxilTypeSystem &typeSys, const bool bEltMemCpy) {
Value *Dest = MI->getRawDest();
Value *Src = MI->getRawSource();
// Only remove one level bitcast generated from inline.
@ -2499,28 +2571,34 @@ void MemcpySplitter::SplitMemCpy(MemCpyInst *MI, const DataLayout &DL,
// split
// Matrix is treated as scalar type, will not use memcpy.
// So use nullptr for fieldAnnotation should be safe here.
SplitCpy(Dest->getType(), Dest, Src, idxList, Builder, typeSys,
fieldAnnotation);
SplitCpy(Dest->getType(), Dest, Src, idxList, Builder, DL, typeSys,
fieldAnnotation, bEltMemCpy);
// delete memcpy
DeleteMemcpy(MI);
}
void MemcpySplitter::Split(llvm::Function &F) {
const DataLayout &DL = F.getParent()->getDataLayout();
// Walk all instruction in the function.
for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
if (MemCpyInst *MI = dyn_cast<MemCpyInst>(I)) {
// Matrix is treated as scalar type, will not use memcpy.
// So use nullptr for fieldAnnotation should be safe here.
SplitMemCpy(MI, DL, /*fieldAnnotation*/ nullptr, m_typeSys);
}
Function *memcpy = nullptr;
for (Function &Fn : F.getParent()->functions()) {
if (Fn.getIntrinsicID() == Intrinsic::memcpy) {
memcpy = &Fn;
break;
}
}
}
if (memcpy) {
for (auto U = memcpy->user_begin(); U != memcpy->user_end();) {
MemCpyInst *MI = cast<MemCpyInst>(*(U++));
if (MI->getParent()->getParent() != &F)
continue;
// Matrix is treated as scalar type, will not use memcpy.
// So use nullptr for fieldAnnotation should be safe here.
SplitMemCpy(MI, DL, /*fieldAnnotation*/ nullptr, m_typeSys,
/*bEltMemCpy*/ false);
}
}
}
//===----------------------------------------------------------------------===//
// SRoA Helper
@ -2593,7 +2671,14 @@ void SROA_Helper::RewriteForGEP(GEPOperator *GEP, IRBuilder<> &Builder) {
Value *NewGEP = Builder.CreateGEP(nullptr, NewElts[i], NewArgs);
NewGEPs.emplace_back(NewGEP);
}
SROA_Helper helper(GEP, NewGEPs, DeadInsts);
const bool bAllowReplace = isa<AllocaInst>(OldVal);
if (SROA_Helper::LowerMemcpy(GEP, /*annoation*/ nullptr, typeSys, DL,
bAllowReplace)) {
if (GEP->user_empty() && isa<Instruction>(GEP))
DeadInsts.push_back(GEP);
return;
}
SROA_Helper helper(GEP, NewGEPs, DeadInsts, typeSys, DL);
helper.RewriteForScalarRepl(GEP, Builder);
for (Value *NewGEP : NewGEPs) {
if (NewGEP->user_empty() && isa<Instruction>(NewGEP)) {
@ -3171,7 +3256,7 @@ void SROA_Helper::RewriteForAddrSpaceCast(ConstantExpr *CE,
CE->getType()->getPointerAddressSpace()));
NewCasts.emplace_back(NewGEP);
}
SROA_Helper helper(CE, NewCasts, DeadInsts);
SROA_Helper helper(CE, NewCasts, DeadInsts, typeSys, DL);
helper.RewriteForScalarRepl(CE, Builder);
}
@ -3255,6 +3340,7 @@ static ArrayType *CreateNestArrayTy(Type *FinalEltTy,
bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
IRBuilder<> &Builder, bool bFlatVector,
bool hasPrecise, DxilTypeSystem &typeSys,
const DataLayout &DL,
SmallVector<Value *, 32> &DeadInsts) {
DEBUG(dbgs() << "Found inst to SROA: " << *V << '\n');
Type *Ty = V->getType();
@ -3377,7 +3463,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
// Now that we have created the new alloca instructions, rewrite all the
// uses of the old alloca.
SROA_Helper helper(V, Elts, DeadInsts);
SROA_Helper helper(V, Elts, DeadInsts, typeSys, DL);
helper.RewriteForScalarRepl(V, Builder);
return true;
@ -3423,9 +3509,11 @@ static Constant *GetEltInit(Type *Ty, Constant *Init, unsigned idx,
/// DoScalarReplacement - Split V into AllocaInsts with Builder and save the new AllocaInsts into Elts.
/// Then do SROA on V.
bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &Elts,
bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
std::vector<Value *> &Elts,
IRBuilder<> &Builder, bool bFlatVector,
bool hasPrecise, DxilTypeSystem &typeSys,
const DataLayout &DL,
SmallVector<Value *, 32> &DeadInsts) {
DEBUG(dbgs() << "Found inst to SROA: " << *GV << '\n');
Type *Ty = GV->getType();
@ -3565,7 +3653,7 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV, std::vector<Value *> &
// Now that we have created the new alloca instructions, rewrite all the
// uses of the old alloca.
SROA_Helper helper(GV, Elts, DeadInsts);
SROA_Helper helper(GV, Elts, DeadInsts, typeSys, DL);
helper.RewriteForScalarRepl(GV, Builder);
return true;
@ -3644,12 +3732,6 @@ struct PointerStatus {
void PointerStatus::analyzePointer(const Value *V, PointerStatus &PS,
DxilTypeSystem &typeSys, bool bStructElt) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
PS.StoredType = PointerStatus::StoredType::InitializerStored;
}
}
for (const User *U : V->users()) {
if (const Instruction *I = dyn_cast<Instruction>(U)) {
const Function *F = I->getParent()->getParent();
@ -3858,6 +3940,90 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC) {
}
}
static bool ReplaceUseOfZeroInitEntry(Instruction *I, Value *V) {
BasicBlock *BB = I->getParent();
Function *F = I->getParent()->getParent();
for (auto U = V->user_begin(); U != V->user_end(); ) {
Instruction *UI = dyn_cast<Instruction>(*(U++));
if (!UI)
continue;
if (UI->getParent()->getParent() != F)
continue;
if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
if (!ReplaceUseOfZeroInitEntry(I, UI))
return false;
else
continue;
}
if (BB != UI->getParent() || UI == I)
continue;
// I is the last inst in the block after split.
// Any inst in current block is before I.
if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
LI->eraseFromParent();
continue;
}
return false;
}
return true;
}
static bool ReplaceUseOfZeroInitPostDom(Instruction *I, Value *V,
PostDominatorTree &PDT) {
BasicBlock *BB = I->getParent();
Function *F = I->getParent()->getParent();
for (auto U = V->user_begin(); U != V->user_end(); ) {
Instruction *UI = dyn_cast<Instruction>(*(U++));
if (!UI)
continue;
if (UI->getParent()->getParent() != F)
continue;
if (!PDT.dominates(BB, UI->getParent()))
return false;
if (isa<GetElementPtrInst>(UI) || isa<BitCastInst>(UI)) {
if (!ReplaceUseOfZeroInitPostDom(I, UI, PDT))
return false;
else
continue;
}
if (BB != UI->getParent() || UI == I)
continue;
// I is the last inst in the block after split.
// Any inst in current block is before I.
if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
LI->replaceAllUsesWith(ConstantAggregateZero::get(LI->getType()));
LI->eraseFromParent();
continue;
}
return false;
}
return true;
}
// When zero initialized GV has only one define, all uses before the def should
// use zero.
static bool ReplaceUseOfZeroInitBeforeDef(Instruction *I, GlobalVariable *GV) {
BasicBlock *BB = I->getParent();
Function *F = I->getParent()->getParent();
// Make sure I is the last inst for BB.
if (I != BB->getTerminator())
BB->splitBasicBlock(I->getNextNode());
if (&F->getEntryBlock() == I->getParent()) {
return ReplaceUseOfZeroInitEntry(I, GV);
} else {
// Post dominator tree.
PostDominatorTree PDT;
PDT.runOnFunction(*F);
return ReplaceUseOfZeroInitPostDom(I, GV, PDT);
}
}
bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
DxilTypeSystem &typeSys, const DataLayout &DL,
bool bAllowReplace) {
@ -3872,6 +4038,32 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
PointerStatus PS(size);
const bool bStructElt = false;
PointerStatus::analyzePointer(V, PS, typeSys, bStructElt);
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
if (PS.StoredType == PointerStatus::StoredType::NotStored) {
PS.StoredType = PointerStatus::StoredType::InitializerStored;
} else if (PS.StoredType == PointerStatus::StoredType::MemcopyDestOnce) {
// For single mem store, if the store not dominator all users.
// Makr it as Stored.
// Case like:
// struct A { float4 x[25]; };
// A a;
// static A a2;
// void set(A aa) { aa = a; }
// call set inside entry function then use a2.
if (isa<ConstantAggregateZero>(GV->getInitializer())) {
Instruction * Memcpy = PS.StoringMemcpy;
if (!ReplaceUseOfZeroInitBeforeDef(Memcpy, GV)) {
PS.StoredType = PointerStatus::StoredType::Stored;
}
}
} else {
PS.StoredType = PointerStatus::StoredType::Stored;
}
}
}
if (bAllowReplace && !PS.HasMultipleAccessingFunctions) {
if (PS.StoredType == PointerStatus::StoredType::MemcopyDestOnce &&
// Skip argument for input argument has input value, it is not dest once anymore.
@ -3979,8 +4171,9 @@ bool SROA_Helper::IsEmptyStructType(Type *Ty, DxilTypeSystem &typeSys) {
//===----------------------------------------------------------------------===//
static void LegalizeDxilInputOutputs(Function *F,
DxilFunctionAnnotation *EntryAnnotation,
DxilTypeSystem &typeSys);
DxilFunctionAnnotation *EntryAnnotation,
const DataLayout &DL,
DxilTypeSystem &typeSys);
namespace {
class SROA_Parameter_HLSL : public ModulePass {
@ -3997,7 +4190,7 @@ public:
MemcpySplitter::PatchMemCpyWithZeroIdxGEP(M);
m_pHLModule = &M.GetOrCreateHLModule();
const DataLayout &DL = M.getDataLayout();
// Load up debug information, to cross-reference values and the instructions
// used to load them.
m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
@ -4030,7 +4223,8 @@ public:
if (&F != m_pHLModule->GetEntryFunction() &&
!m_pHLModule->IsEntryThatUsesSignatures(&F)) {
if (!F.isDeclaration())
LegalizeDxilInputOutputs(&F, m_pHLModule->GetFunctionAnnotation(&F), m_pHLModule->GetTypeSystem());
LegalizeDxilInputOutputs(&F, m_pHLModule->GetFunctionAnnotation(&F),
DL, m_pHLModule->GetTypeSystem());
continue;
}
@ -4279,8 +4473,7 @@ void SROA_Parameter_HLSL::flattenGlobal(GlobalVariable *GV) {
bool SROAed = SROA_Helper::DoScalarReplacement(
EltGV, Elts, Builder, bFlatVector,
// TODO: set precise.
/*hasPrecise*/ false,
dxilTypeSys, DeadInsts);
/*hasPrecise*/ false, dxilTypeSys, DL, DeadInsts);
if (SROAed) {
// Push Elts into workList.
@ -5090,7 +5283,7 @@ void SROA_Parameter_HLSL::flattenArgument(
// Not flat vector for entry function currently.
bool SROAed = SROA_Helper::DoScalarReplacement(
V, Elts, Builder, /*bFlatVector*/ false, annotation.IsPrecise(),
dxilTypeSys, DeadInsts);
dxilTypeSys, DL, DeadInsts);
if (SROAed) {
Type *Ty = V->getType()->getPointerElementType();
@ -5292,7 +5485,7 @@ void SROA_Parameter_HLSL::flattenArgument(
IRBuilder<> Builder(CI);
llvm::SmallVector<llvm::Value *, 16> idxList;
SplitCpy(data->getType(), outputVal, data, idxList, Builder,
SplitCpy(data->getType(), outputVal, data, idxList, Builder, DL,
dxilTypeSys, &flatParamAnnotation);
CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
@ -5319,7 +5512,7 @@ void SROA_Parameter_HLSL::flattenArgument(
llvm::SmallVector<llvm::Value *, 16> idxList;
SplitCpy(DataPtr->getType(), EltPtr, DataPtr, idxList,
Builder, dxilTypeSys, &flatParamAnnotation);
Builder, DL, dxilTypeSys, &flatParamAnnotation);
CI->setArgOperand(i, EltPtr);
}
}
@ -5477,7 +5670,8 @@ void SROA_Parameter_HLSL::moveFunctionBody(Function *F, Function *flatF) {
}
}
static void SplitArrayCopy(Value *V, DxilTypeSystem &typeSys,
static void SplitArrayCopy(Value *V, const DataLayout &DL,
DxilTypeSystem &typeSys,
DxilFieldAnnotation *fieldAnnotation) {
for (auto U = V->user_begin(); U != V->user_end();) {
User *user = *(U++);
@ -5486,7 +5680,7 @@ static void SplitArrayCopy(Value *V, DxilTypeSystem &typeSys,
Value *val = ST->getValueOperand();
IRBuilder<> Builder(ST);
SmallVector<Value *, 16> idxList;
SplitCpy(ptr->getType(), ptr, val, idxList, Builder, typeSys,
SplitCpy(ptr->getType(), ptr, val, idxList, Builder, DL, typeSys,
fieldAnnotation);
ST->eraseFromParent();
}
@ -5529,6 +5723,7 @@ static void CheckArgUsage(Value *V, bool &bLoad, bool &bStore) {
// Support store to input and load from output.
static void LegalizeDxilInputOutputs(Function *F,
DxilFunctionAnnotation *EntryAnnotation,
const DataLayout &DL,
DxilTypeSystem &typeSys) {
BasicBlock &EntryBlk = F->getEntryBlock();
Module *M = F->getParent();
@ -5626,7 +5821,7 @@ static void LegalizeDxilInputOutputs(Function *F,
if (bStoreInputToTemp) {
llvm::SmallVector<llvm::Value *, 16> idxList;
// split copy.
SplitCpy(temp->getType(), temp, &arg, idxList, Builder, typeSys,
SplitCpy(temp->getType(), temp, &arg, idxList, Builder, DL, typeSys,
&paramAnnotation);
}
@ -5656,7 +5851,7 @@ static void LegalizeDxilInputOutputs(Function *F,
else
onlyRetBlk = true;
// split copy.
SplitCpy(output->getType(), output, temp, idxList, Builder, typeSys,
SplitCpy(output->getType(), output, temp, idxList, Builder, DL, typeSys,
&paramAnnotation);
}
// Clone the return.
@ -5674,6 +5869,8 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
"otherwise, createFlattenedFunction called on library function "
"that should not be flattened.");
const DataLayout &DL = m_pHLModule->GetModule()->getDataLayout();
// Skip void (void) function.
if (F->getReturnType()->isVoidTy() && F->getArgumentList().empty()) {
return;
@ -5859,7 +6056,7 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
}
if (!F->isDeclaration()) {
// Support store to input and load from output.
LegalizeDxilInputOutputs(F, funcAnnotation, typeSys);
LegalizeDxilInputOutputs(F, funcAnnotation, DL, typeSys);
}
return;
}
@ -6001,12 +6198,12 @@ void SROA_Parameter_HLSL::createFlattenedFunction(Function *F) {
Type *Ty = Arg->getType()->getPointerElementType();
if (Ty->isArrayTy())
SplitArrayCopy(
Arg, typeSys,
Arg, DL, typeSys,
&flatFuncAnnotation->GetParameterAnnotation(Arg->getArgNo()));
}
}
// Support store to input and load from output.
LegalizeDxilInputOutputs(flatF, flatFuncAnnotation, typeSys);
LegalizeDxilInputOutputs(flatF, flatFuncAnnotation, DL, typeSys);
}
}
@ -6780,6 +6977,8 @@ void ResourceToHandle::ReplaceResourceWithHandle(Value *ResPtr,
Builder.CreateStore(Handle, HandlePtr);
// Remove resource Store.
SI->eraseFromParent();
} else if (U->user_empty() && isa<GEPOperator>(U)) {
continue;
} else {
CallInst *CI = cast<CallInst>(U);
IRBuilder<> Builder(CI);

Просмотреть файл

@ -3086,6 +3086,9 @@ static bool CreateCBufferVariable(HLCBuffer &CB,
if (cbSubscript->user_empty()) {
cbSubscript->eraseFromParent();
Handle->eraseFromParent();
} else {
// merge GEP use for cbSubscript.
HLModule::MergeGepUse(cbSubscript);
}
}
return true;
@ -4468,7 +4471,7 @@ void CGMSHLSLRuntime::FinishCodeGen() {
if (f.hasFnAttribute(llvm::Attribute::NoInline))
continue;
// Always inline for used functions.
if (!f.user_empty())
if (!f.user_empty() && !f.isDeclaration())
f.addFnAttr(llvm::Attribute::AlwaysInline);
}

Просмотреть файл

@ -646,6 +646,12 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
uint32_t index = 0;
for (const auto *decl : typeTranslator.collectDeclsInDeclContext(context))
if (const auto *varDecl = dyn_cast<VarDecl>(decl)) {
if (const auto *init = varDecl->getInit()) {
emitWarning(
"variable '%0' will be placed in $Globals so initializer ignored",
init->getExprLoc())
<< var->getName() << init->getSourceRange();
}
if (const auto *attr = varDecl->getAttr<VKBindingAttr>()) {
emitError("variable '%0' will be placed in $Globals so cannot have "
"vk::binding attribute",

Просмотреть файл

@ -1192,7 +1192,7 @@ void SPIRVEmitter::doHLSLBufferDecl(const HLSLBufferDecl *bufferDecl) {
if (const auto *varMember = dyn_cast<VarDecl>(member)) {
if (const auto *init = varMember->getInit())
emitWarning("%select{tbuffer|cbuffer}0 member initializer "
"ignored since no equivalent in Vulkan",
"ignored since no Vulkan equivalent",
init->getExprLoc())
<< bufferDecl->isCBuffer() << init->getSourceRange();

Просмотреть файл

@ -0,0 +1,23 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Make sure initialize static global inside user function can still be propagated.
// CHECK-NOT: alloca
struct A {
float4 x[25];
};
A a;
static A a2;
void set(A aa) {
aa = a;
}
float4 main(uint l:L) : SV_Target {
set(a2);
return a2.x[l];
}

Просмотреть файл

@ -0,0 +1,30 @@
// RUN: %dxc -E main -T ps_6_0 -Zi %s | FileCheck %s
// Make sure debug info works for flattened alloca.
// CHECK:call void @llvm.dbg.declare(metadata [2 x float]* %a2.1,
struct X {
float a;
int b;
};
struct A {
X x[25];
float y[2];
};
A a;
float b;
void set(A aa) {
aa = a;
aa.y[0] = b;
aa.y[1] = 3;
}
float4 main(uint l:L) : SV_Target {
A a2;
set(a2);
return a2.x[l].a + a2.y[l];
}

Просмотреть файл

@ -0,0 +1,28 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Make sure initialize static global inside user function can still be propagated.
// CHECK-NOT: alloca
// Make sure cbuffer is used.
// CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoad
// Make sure use of zero initializer get zero.
// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
struct A {
float4 x[25];
};
A a;
static A a2;
void set(A aa) {
aa = a;
}
float2 main(uint l:L) : SV_Target {
float m = a2.x[l].x;
set(a2);
return float2(a2.x[l].x,m);
}

Просмотреть файл

@ -1,7 +1,8 @@
// Run: %dxc -T vs_6_0 -E main
[[vk::binding(10, 2)]] float4 gVec;
[[vk::binding(10, 2)]] float4 gVec = 1.0;
float4 main() : A { return gVec; }
// CHECK: :3:38: warning: variable 'gVec' will be placed in $Globals so initializer ignored
// CHECK: :3:3: error: variable 'gVec' will be placed in $Globals so cannot have vk::binding attribute

Просмотреть файл

@ -9,5 +9,5 @@ float main() : A {
return 1.0;
}
// CHECK: :4:15: warning: cbuffer member initializer ignored since no equivalent in Vulkan
// CHECK: :5:16: warning: cbuffer member initializer ignored since no equivalent in Vulkan
// CHECK: :4:15: warning: cbuffer member initializer ignored since no Vulkan equivalent
// CHECK: :5:16: warning: cbuffer member initializer ignored since no Vulkan equivalent

Просмотреть файл

@ -9,6 +9,5 @@ float main() : A {
return 1.0;
}
// CHECK: :4:15: warning: tbuffer member initializer ignored since no equivalent in Vulkan
// CHECK: :5:16: warning: tbuffer member initializer ignored since no equivalent in Vulkan
// CHECK: :4:15: warning: tbuffer member initializer ignored since no Vulkan equivalent
// CHECK: :5:16: warning: tbuffer member initializer ignored since no Vulkan equivalent