Skip copy-in/copy-out for constant global variables. (#2836)

* Skip copy-in/copy-out for constant global variables.

* Enable copy for noinline.
TODO: analysis for global variable alias with parameter.

* Use SetVector and skip resource when copy.

* Disable mayAliasWithGlobal because optimization already covered case not replace when have alias.
When replace const global to a normal value has store, mark it non-constant.
This commit is contained in:
Xiang Li 2020-04-23 19:08:03 -07:00 коммит произвёл GitHub
Родитель 1053ca45ab
Коммит 49310e2b2c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 756 добавлений и 242 удалений

Просмотреть файл

@ -115,6 +115,10 @@ void initializeDxilValidateWaveSensitivityPass(llvm::PassRegistry&);
FunctionPass *createCleanupDxBreakPass();
void initializeCleanupDxBreakPass(llvm::PassRegistry&);
ModulePass *createHLLegalizeParameter();
void initializeHLLegalizeParameterPass(llvm::PassRegistry &);
bool AreDxilResourcesDense(llvm::Module *M, hlsl::DxilResourceBase **ppNonDense);
}

94
include/dxc/HLSL/HLUtil.h Normal file
Просмотреть файл

@ -0,0 +1,94 @@
///////////////////////////////////////////////////////////////////////////////
// //
// HLUtil.h //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// HL helper functions. //
// //
///////////////////////////////////////////////////////////////////////////////
#pragma once
#include "llvm/ADT/SetVector.h"
namespace llvm {
class Function;
class Value;
class MemCpyInst;
} // namespace llvm
namespace hlsl {
class DxilTypeSystem;
namespace hlutil {
struct PointerStatus {
/// Keep track of what stores to the pointer look like.
enum class StoredType {
/// There is no store to this pointer. It can thus be marked constant.
NotStored,
/// This ptr is a global, and is stored to, but the only thing stored is the
/// constant it
/// was initialized with. This is only tracked for scalar globals.
InitializerStored,
/// This ptr is stored to, but only its initializer and one other value
/// is ever stored to it. If this global isStoredOnce, we track the value
/// stored to it in StoredOnceValue below. This is only tracked for scalar
/// globals.
StoredOnce,
/// This ptr is only assigned by a memcpy.
MemcopyDestOnce,
/// This ptr is stored to by multiple values or something else that we
/// cannot track.
Stored
} storedType;
/// Keep track of what loaded from the pointer look like.
enum class LoadedType {
/// There is no load to this pointer. It can thus be marked constant.
NotLoaded,
/// This ptr is only used by a memcpy.
MemcopySrcOnce,
/// This ptr is loaded to by multiple instructions or something else that we
/// cannot track.
Loaded
} loadedType;
/// If only one value (besides the initializer constant) is ever stored to
/// this global, keep track of what value it is.
llvm::Value *StoredOnceValue;
/// Memcpy which this ptr is used.
llvm::SetVector<llvm::MemCpyInst *> memcpySet;
/// Memcpy which use this ptr as dest.
llvm::MemCpyInst *StoringMemcpy;
/// Memcpy which use this ptr as src.
llvm::MemCpyInst *LoadingMemcpy;
/// These start out null/false. When the first accessing function is noticed,
/// it is recorded. When a second different accessing function is noticed,
/// HasMultipleAccessingFunctions is set to true.
const llvm::Function *AccessingFunction;
bool HasMultipleAccessingFunctions;
/// Size of the ptr.
unsigned Size;
llvm::Value *Ptr;
// Just check load store.
bool bLoadStoreOnly;
void analyze(DxilTypeSystem &typeSys, bool bStructElt);
PointerStatus(llvm::Value *ptr, unsigned size, bool bLdStOnly);
void MarkAsStored();
void MarkAsLoaded();
bool HasStored();
bool HasLoaded();
};
} // namespace hlutil
} // namespace hlsl

Просмотреть файл

@ -35,6 +35,7 @@ add_llvm_library(LLVMHLSL
DxcOptimizer.cpp
HLDeadFunctionElimination.cpp
HLExpandStoreIntrinsics.cpp
HLLegalizeParameter.cpp
HLLowerUDT.cpp
HLMatrixBitcastLowerPass.cpp
HLMatrixLowerPass.cpp
@ -48,6 +49,7 @@ add_llvm_library(LLVMHLSL
HLPreprocess.cpp
HLResource.cpp
HLSignatureLower.cpp
HLUtil.cpp
PauseResumePasses.cpp
WaveSensitivityAnalysis.cpp

Просмотреть файл

@ -130,6 +130,7 @@ HRESULT SetupRegistryPassForHLSL() {
initializeHLEmitMetadataPass(Registry);
initializeHLEnsureMetadataPass(Registry);
initializeHLExpandStoreIntrinsicsPass(Registry);
initializeHLLegalizeParameterPass(Registry);
initializeHLMatrixLowerPassPass(Registry);
initializeHLPreprocessPass(Registry);
initializeHoistConstantArrayPass(Registry);

Просмотреть файл

@ -0,0 +1,312 @@
///////////////////////////////////////////////////////////////////////////////
// //
// HLLegalizeParameter.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Legalize in parameter has write and out parameter has read. //
// Must be call before inline pass. //
///////////////////////////////////////////////////////////////////////////////
#include "dxc/HLSL/HLModule.h"
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilUtil.h"
#include "dxc/HLSL/DxilGenerationPass.h"
#include "dxc/HLSL/HLUtil.h"
#include "dxc/DXIL/DxilTypeSystem.h"
#include "llvm/IR/IntrinsicInst.h"
#include "dxc/Support/Global.h"
#include "llvm/Pass.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Casting.h"
#include <vector>
using namespace llvm;
using namespace hlsl;
// For parameter need to legalize, create alloca to replace all uses of it, and copy between the alloca and the parameter.
namespace {
class HLLegalizeParameter : public ModulePass {
public:
static char ID;
explicit HLLegalizeParameter() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
private:
void patchWriteOnInParam(Function &F, Argument &Arg, const DataLayout &DL);
void patchReadOnOutParam(Function &F, Argument &Arg, const DataLayout &DL);
};
AllocaInst *createAllocaForPatch(Function &F, Type *Ty) {
IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
return Builder.CreateAlloca(Ty);
}
void copyIn(AllocaInst *temp, Value *arg, CallInst *CI, unsigned size) {
if (size == 0)
return;
// copy arg to temp befor CI.
IRBuilder<> Builder(CI);
Builder.CreateMemCpy(temp, arg, size, 1);
}
void copyOut(AllocaInst *temp, Value *arg, CallInst *CI, unsigned size) {
if (size == 0)
return;
// copy temp to arg after CI.
IRBuilder<> Builder(CI->getNextNode());
Builder.CreateMemCpy(arg, temp, size, 1);
}
bool isPointerNeedToLower(Value *V, Type *HandleTy) {
// CBuffer, Buffer, Texture....
// Anything related to dxil op.
// hl.subscript.
// Got to root of GEP.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
V = GEP->getPointerOperand();
}
CallInst *CI = dyn_cast<CallInst>(V);
if (!CI)
return false;
HLOpcodeGroup group = GetHLOpcodeGroup(CI->getCalledFunction());
if (group != HLOpcodeGroup::HLSubscript)
return false;
Value *Ptr = CI->getArgOperand(HLOperandIndex::kSubscriptObjectOpIdx);
// Ptr from resource handle.
if (Ptr->getType() == HandleTy)
return true;
unsigned Opcode = GetHLOpcode(CI);
// Ptr from cbuffer.
if (Opcode == (unsigned)HLSubscriptOpcode::CBufferSubscript)
return true;
return isPointerNeedToLower(Ptr, HandleTy);
}
bool mayAliasWithGlobal(Value *V, CallInst *CallSite, std::vector<GlobalVariable *> &staticGVs) {
// The unsafe case need copy-in copy-out will be global variable alias with
// parameter. Then global variable is updated in the function, the parameter
// will be updated silently.
// Currently add copy for all non-const static global in
// CGMSHLSLRuntime::EmitHLSLOutParamConversionInit.
//So here just return false and do nothing.
// For case like
// struct T {
// float4 a[10];
//};
// static T g;
// void foo(inout T t) {
// // modify g
//}
// void bar() {
// T t = g;
// // Not copy because t is local.
// // But optimizations will change t to g later.
// foo(t);
//}
// Optimizations which remove the copy should not replace foo(t) into foo(g)
// when g could be modified.
// TODO: remove copy for global in
// CGMSHLSLRuntime::EmitHLSLOutParamConversionInit, do analysis to check alias
// only generate copy when there's alias.
return false;
}
struct CopyData {
CallInst *CallSite;
Value *Arg;
bool bCopyIn;
bool bCopyOut;
};
void ParameterCopyInCopyOut(hlsl::HLModule &HLM) {
Module &M = *HLM.GetModule();
Type *HandleTy = HLM.GetOP()->GetHandleType();
const DataLayout &DL = M.getDataLayout();
std::vector<GlobalVariable *> staticGVs;
for (GlobalVariable &GV : M.globals()) {
if (dxilutil::IsStaticGlobal(&GV) && !GV.isConstant()) {
staticGVs.emplace_back(&GV);
}
}
SmallVector<CopyData, 4> WorkList;
for (Function &F : M) {
if (F.user_empty())
continue;
DxilFunctionAnnotation *Annot = HLM.GetFunctionAnnotation(&F);
// Skip functions don't have annotation, include llvm intrinsic and HLOp
// functions.
if (!Annot)
continue;
bool bNoInline = F.hasFnAttribute(llvm::Attribute::NoInline) || F.isDeclaration();
for (User *U : F.users()) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
Value *arg = CI->getArgOperand(i);
if (!arg->getType()->isPointerTy())
continue;
DxilParameterAnnotation &ParamAnnot = Annot->GetParameterAnnotation(i);
bool bCopyIn = false;
bool bCopyOut = false;
switch (ParamAnnot.GetParamInputQual()) {
default:
break;
case DxilParamInputQual::In: {
bCopyIn = true;
} break;
case DxilParamInputQual::Out: {
bCopyOut = true;
} break;
case DxilParamInputQual::Inout: {
bCopyIn = true;
bCopyOut = true;
} break;
}
if (!bCopyIn && !bCopyOut)
continue;
// When use ptr from cbuffer/buffer, need copy to avoid lower on user
// function.
bool bNeedCopy = mayAliasWithGlobal(arg, CI, staticGVs);
if (bNoInline)
bNeedCopy |= isPointerNeedToLower(arg, HandleTy);
if (!bNeedCopy)
continue;
CopyData data = {CI, arg, bCopyIn, bCopyOut};
WorkList.emplace_back(data);
}
}
}
for (CopyData &data : WorkList) {
CallInst *CI = data.CallSite;
Value *arg = data.Arg;
Type *Ty = arg->getType()->getPointerElementType();
Type *EltTy = dxilutil::GetArrayEltTy(Ty);
// Skip on object type and resource type.
if (dxilutil::IsHLSLObjectType(EltTy) ||
dxilutil::IsHLSLResourceType(EltTy))
continue;
unsigned size = DL.getTypeAllocSize(Ty);
AllocaInst *temp = createAllocaForPatch(*CI->getParent()->getParent(), Ty);
if (data.bCopyIn)
copyIn(temp, arg, CI, size);
if (data.bCopyOut)
copyOut(temp, arg, CI, size);
CI->replaceUsesOfWith(arg, temp);
}
}
} // namespace
bool HLLegalizeParameter::runOnModule(Module &M) {
HLModule &HLM = M.GetOrCreateHLModule();
auto &typeSys = HLM.GetTypeSystem();
const DataLayout &DL = M.getDataLayout();
for (Function &F : M) {
if (F.isDeclaration())
continue;
DxilFunctionAnnotation *Annot = HLM.GetFunctionAnnotation(&F);
if (!Annot)
continue;
for (Argument &Arg : F.args()) {
if (!Arg.getType()->isPointerTy())
continue;
Type *EltTy = dxilutil::GetArrayEltTy(Arg.getType());
if (dxilutil::IsHLSLObjectType(EltTy) ||
dxilutil::IsHLSLResourceType(EltTy))
continue;
DxilParameterAnnotation &ParamAnnot =
Annot->GetParameterAnnotation(Arg.getArgNo());
switch (ParamAnnot.GetParamInputQual()) {
default:
break;
case DxilParamInputQual::In: {
hlutil::PointerStatus PS(&Arg, 0, /*bLdStOnly*/ true);
PS.analyze(typeSys, /*bStructElt*/ false);
if (PS.HasStored()) {
patchWriteOnInParam(F, Arg, DL);
}
} break;
case DxilParamInputQual::Out: {
hlutil::PointerStatus PS(&Arg, 0, /*bLdStOnly*/ true);
PS.analyze(typeSys, /*bStructElt*/false);
if (PS.HasLoaded()) {
patchReadOnOutParam(F, Arg, DL);
}
}
}
}
}
// Copy-in copy-out for ptr arg when need.
ParameterCopyInCopyOut(HLM);
return true;
}
void HLLegalizeParameter::patchWriteOnInParam(Function &F, Argument &Arg,
const DataLayout &DL) {
Type *Ty = Arg.getType()->getPointerElementType();
AllocaInst *temp = createAllocaForPatch(F, Ty);
Arg.replaceAllUsesWith(temp);
IRBuilder<> Builder(temp->getNextNode());
unsigned size = DL.getTypeAllocSize(Ty);
// copy arg to temp at beginning of function.
Builder.CreateMemCpy(temp, &Arg, size, 1);
}
void HLLegalizeParameter::patchReadOnOutParam(Function &F, Argument &Arg,
const DataLayout &DL) {
Type *Ty = Arg.getType()->getPointerElementType();
AllocaInst *temp = createAllocaForPatch(F, Ty);
Arg.replaceAllUsesWith(temp);
unsigned size = DL.getTypeAllocSize(Ty);
for (auto &BB : F.getBasicBlockList()) {
// copy temp to arg before every return.
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
IRBuilder<> RetBuilder(RI);
RetBuilder.CreateMemCpy(&Arg, temp, size, 1);
}
}
}
char HLLegalizeParameter::ID = 0;
ModulePass *llvm::createHLLegalizeParameter() {
return new HLLegalizeParameter();
}
INITIALIZE_PASS(HLLegalizeParameter, "hl-legalize-parameter",
"Legalize parameter", false, false)

196
lib/HLSL/HLUtil.cpp Normal file
Просмотреть файл

@ -0,0 +1,196 @@
///////////////////////////////////////////////////////////////////////////////
// //
// HLUtil.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// HL helper functions. //
// //
///////////////////////////////////////////////////////////////////////////////
#include "dxc/HLSL/HLUtil.h"
#include "dxc/HLSL/HLOperations.h"
#include "dxc/DXIL/DxilTypeSystem.h"
#include "dxc/Support/Global.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
using namespace llvm;
using namespace hlsl;
using namespace hlsl::hlutil;
namespace {
void analyzePointer(const Value *V, PointerStatus &PS, DxilTypeSystem &typeSys,
bool bStructElt, bool bLdStOnly) {
// Early return when only care load store.
if (bLdStOnly) {
if (PS.HasLoaded() && PS.HasStored())
return;
}
for (const User *U : V->users()) {
if (const Instruction *I = dyn_cast<Instruction>(U)) {
const Function *F = I->getParent()->getParent();
if (!PS.AccessingFunction) {
PS.AccessingFunction = F;
} else {
if (F != PS.AccessingFunction)
PS.HasMultipleAccessingFunctions = true;
}
}
if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(U)) {
analyzePointer(BC, PS, typeSys, bStructElt, bLdStOnly);
} else if (const MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
// Do not collect memcpy on struct GEP use.
// These memcpy will be flattened in next level.
if (!bStructElt) {
MemCpyInst *MI = const_cast<MemCpyInst *>(MC);
PS.memcpySet.insert(MI);
bool bFullCopy = false;
if (ConstantInt *Length = dyn_cast<ConstantInt>(MC->getLength())) {
bFullCopy = PS.Size == Length->getLimitedValue() || PS.Size == 0 ||
Length->getLimitedValue() == 0; // handle unbounded arrays
}
if (MC->getRawDest() == V) {
if (bFullCopy &&
PS.storedType == PointerStatus::StoredType::NotStored) {
PS.storedType = PointerStatus::StoredType::MemcopyDestOnce;
PS.StoringMemcpy = MI;
} else {
PS.MarkAsStored();
PS.StoringMemcpy = nullptr;
}
} else if (MC->getRawSource() == V) {
if (bFullCopy &&
PS.loadedType == PointerStatus::LoadedType::NotLoaded) {
PS.loadedType = PointerStatus::LoadedType::MemcopySrcOnce;
PS.LoadingMemcpy = MI;
} else {
PS.MarkAsLoaded();
PS.LoadingMemcpy = nullptr;
}
}
} else {
if (MC->getRawDest() == V) {
PS.MarkAsStored();
} else {
DXASSERT(MC->getRawSource() == V, "must be source here");
PS.MarkAsLoaded();
}
}
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
gep_type_iterator GEPIt = gep_type_begin(GEP);
gep_type_iterator GEPEnd = gep_type_end(GEP);
// Skip pointer idx.
GEPIt++;
// Struct elt will be flattened in next level.
bool bStructElt = (GEPIt != GEPEnd) && GEPIt->isStructTy();
analyzePointer(GEP, PS, typeSys, bStructElt, bLdStOnly);
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getOperand(0);
if (PS.storedType == PointerStatus::StoredType::NotStored) {
PS.storedType = PointerStatus::StoredType::StoredOnce;
PS.StoredOnceValue = V;
} else {
PS.MarkAsStored();
}
} else if (dyn_cast<LoadInst>(U)) {
PS.MarkAsLoaded();
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
Function *F = CI->getCalledFunction();
DxilFunctionAnnotation *annotation = typeSys.GetFunctionAnnotation(F);
if (!annotation) {
HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
switch (group) {
case HLOpcodeGroup::HLMatLoadStore: {
HLMatLoadStoreOpcode opcode =
static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
switch (opcode) {
case HLMatLoadStoreOpcode::ColMatLoad:
case HLMatLoadStoreOpcode::RowMatLoad:
PS.MarkAsLoaded();
break;
case HLMatLoadStoreOpcode::ColMatStore:
case HLMatLoadStoreOpcode::RowMatStore:
PS.MarkAsStored();
break;
default:
DXASSERT(0, "invalid opcode");
PS.MarkAsStored();
PS.MarkAsLoaded();
}
} break;
case HLOpcodeGroup::HLSubscript: {
HLSubscriptOpcode opcode =
static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
switch (opcode) {
case HLSubscriptOpcode::VectorSubscript:
case HLSubscriptOpcode::ColMatElement:
case HLSubscriptOpcode::ColMatSubscript:
case HLSubscriptOpcode::RowMatElement:
case HLSubscriptOpcode::RowMatSubscript:
analyzePointer(CI, PS, typeSys, bStructElt, bLdStOnly);
break;
default:
// Rest are resource ptr like buf[i].
// Only read of resource handle.
PS.MarkAsLoaded();
break;
}
} break;
default: {
// If not sure its out param or not. Take as out param.
PS.MarkAsStored();
PS.MarkAsLoaded();
}
}
continue;
}
unsigned argSize = F->arg_size();
for (unsigned i = 0; i < argSize; i++) {
Value *arg = CI->getArgOperand(i);
if (V == arg) {
// Do not replace struct arg.
// Mark stored and loaded to disable replace.
PS.MarkAsStored();
PS.MarkAsLoaded();
}
}
}
}
}
}
namespace hlsl {
namespace hlutil {
void PointerStatus::analyze(DxilTypeSystem &typeSys, bool bStructElt) {
analyzePointer(Ptr, *this, typeSys, bStructElt, bLoadStoreOnly);
}
PointerStatus::PointerStatus(llvm::Value *ptr, unsigned size, bool bLdStOnly)
: storedType(StoredType::NotStored), loadedType(LoadedType::NotLoaded),
StoredOnceValue(nullptr), StoringMemcpy(nullptr), LoadingMemcpy(nullptr),
AccessingFunction(nullptr), HasMultipleAccessingFunctions(false),
Size(size), Ptr(ptr), bLoadStoreOnly(bLdStOnly) {}
void PointerStatus::MarkAsStored() {
storedType = StoredType::Stored;
StoredOnceValue = nullptr;
}
void PointerStatus::MarkAsLoaded() { loadedType = LoadedType::Loaded; }
bool PointerStatus::HasStored() {
return storedType != StoredType::NotStored &&
storedType != StoredType::InitializerStored;
}
bool PointerStatus::HasLoaded() { return loadedType != LoadedType::NotLoaded; }
} // namespace hlutil
} // namespace hlsl

Просмотреть файл

@ -331,6 +331,8 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createDxilInsertPreservesPass()); // HLSL Change - insert preserve instructions
if (Inliner) {
MPM.add(createHLLegalizeParameter()); // HLSL Change - legalize parameters
// before inline.
MPM.add(Inliner);
Inliner = nullptr;
}
@ -375,6 +377,8 @@ void PassManagerBuilder::populateModulePassManager(
}
// HLSL Change Begins
MPM.add(createHLLegalizeParameter()); // legalize parameters before inline.
MPM.add(createAlwaysInlinerPass(/*InsertLifeTime*/false));
if (Inliner) {
delete Inliner;

Просмотреть файл

@ -59,6 +59,7 @@
#include "dxc/HLSL/HLMatrixType.h"
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/HLSL/HLLowerUDT.h"
#include "dxc/HLSL/HLUtil.h"
#include <deque>
#include <unordered_map>
#include <unordered_set>
@ -3374,214 +3375,6 @@ bool SROA_Helper::DoScalarReplacement(GlobalVariable *GV,
return true;
}
struct PointerStatus {
/// Keep track of what stores to the pointer look like.
enum StoredType {
/// There is no store to this pointer. It can thus be marked constant.
NotStored,
/// This ptr is a global, and is stored to, but the only thing stored is the
/// constant it
/// was initialized with. This is only tracked for scalar globals.
InitializerStored,
/// This ptr is stored to, but only its initializer and one other value
/// is ever stored to it. If this global isStoredOnce, we track the value
/// stored to it in StoredOnceValue below. This is only tracked for scalar
/// globals.
StoredOnce,
/// This ptr is only assigned by a memcpy.
MemcopyDestOnce,
/// This ptr is stored to by multiple values or something else that we
/// cannot track.
Stored
} storedType;
/// Keep track of what loaded from the pointer look like.
enum LoadedType {
/// There is no load to this pointer. It can thus be marked constant.
NotLoaded,
/// This ptr is only used by a memcpy.
MemcopySrcOnce,
/// This ptr is loaded to by multiple instructions or something else that we
/// cannot track.
Loaded
} loadedType;
/// If only one value (besides the initializer constant) is ever stored to
/// this global, keep track of what value it is.
Value *StoredOnceValue;
/// Memcpy which this ptr is used.
std::unordered_set<MemCpyInst *> memcpySet;
/// Memcpy which use this ptr as dest.
MemCpyInst *StoringMemcpy;
/// Memcpy which use this ptr as src.
MemCpyInst *LoadingMemcpy;
/// These start out null/false. When the first accessing function is noticed,
/// it is recorded. When a second different accessing function is noticed,
/// HasMultipleAccessingFunctions is set to true.
const Function *AccessingFunction;
bool HasMultipleAccessingFunctions;
/// Size of the ptr.
unsigned Size;
/// Look at all uses of the global and fill in the GlobalStatus structure. If
/// the global has its address taken, return true to indicate we can't do
/// anything with it.
static void analyzePointer(const Value *V, PointerStatus &PS,
DxilTypeSystem &typeSys, bool bStructElt);
PointerStatus(unsigned size)
: storedType(StoredType::NotStored), loadedType(LoadedType::NotLoaded), StoredOnceValue(nullptr),
StoringMemcpy(nullptr), LoadingMemcpy(nullptr),
AccessingFunction(nullptr), HasMultipleAccessingFunctions(false),
Size(size) {}
void MarkAsStored() {
storedType = StoredType::Stored;
StoredOnceValue = nullptr;
}
void MarkAsLoaded() { loadedType = LoadedType::Loaded; }
};
void PointerStatus::analyzePointer(const Value *V, PointerStatus &PS,
DxilTypeSystem &typeSys, bool bStructElt) {
for (const User *U : V->users()) {
if (const Instruction *I = dyn_cast<Instruction>(U)) {
const Function *F = I->getParent()->getParent();
if (!PS.AccessingFunction) {
PS.AccessingFunction = F;
} else {
if (F != PS.AccessingFunction)
PS.HasMultipleAccessingFunctions = true;
}
}
if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(U)) {
analyzePointer(BC, PS, typeSys, bStructElt);
} else if (const MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
// Do not collect memcpy on struct GEP use.
// These memcpy will be flattened in next level.
if (!bStructElt) {
MemCpyInst *MI = const_cast<MemCpyInst *>(MC);
PS.memcpySet.insert(MI);
bool bFullCopy = false;
if (ConstantInt *Length = dyn_cast<ConstantInt>(MC->getLength())) {
bFullCopy = PS.Size == Length->getLimitedValue()
|| PS.Size == 0 || Length->getLimitedValue() == 0; // handle unbounded arrays
}
if (MC->getRawDest() == V) {
if (bFullCopy &&
PS.storedType == StoredType::NotStored) {
PS.storedType = StoredType::MemcopyDestOnce;
PS.StoringMemcpy = MI;
} else {
PS.MarkAsStored();
PS.StoringMemcpy = nullptr;
}
} else if (MC->getRawSource() == V) {
if (bFullCopy &&
PS.loadedType == LoadedType::NotLoaded) {
PS.loadedType = LoadedType::MemcopySrcOnce;
PS.LoadingMemcpy = MI;
} else {
PS.MarkAsLoaded();
PS.LoadingMemcpy = nullptr;
}
}
} else {
if (MC->getRawDest() == V) {
PS.MarkAsStored();
} else {
DXASSERT(MC->getRawSource() == V, "must be source here");
PS.MarkAsLoaded();
}
}
} else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
gep_type_iterator GEPIt = gep_type_begin(GEP);
gep_type_iterator GEPEnd = gep_type_end(GEP);
// Skip pointer idx.
GEPIt++;
// Struct elt will be flattened in next level.
bool bStructElt = (GEPIt != GEPEnd) && GEPIt->isStructTy();
analyzePointer(GEP, PS, typeSys, bStructElt);
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getOperand(0);
if (PS.storedType == StoredType::NotStored) {
PS.storedType = StoredType::StoredOnce;
PS.StoredOnceValue = V;
} else {
PS.MarkAsStored();
}
} else if (dyn_cast<LoadInst>(U)) {
PS.MarkAsLoaded();
} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
Function *F = CI->getCalledFunction();
DxilFunctionAnnotation *annotation = typeSys.GetFunctionAnnotation(F);
if (!annotation) {
HLOpcodeGroup group = hlsl::GetHLOpcodeGroupByName(F);
switch (group) {
case HLOpcodeGroup::HLMatLoadStore: {
HLMatLoadStoreOpcode opcode =
static_cast<HLMatLoadStoreOpcode>(hlsl::GetHLOpcode(CI));
switch (opcode) {
case HLMatLoadStoreOpcode::ColMatLoad:
case HLMatLoadStoreOpcode::RowMatLoad:
PS.MarkAsLoaded();
break;
case HLMatLoadStoreOpcode::ColMatStore:
case HLMatLoadStoreOpcode::RowMatStore:
PS.MarkAsStored();
break;
default:
DXASSERT(0, "invalid opcode");
PS.MarkAsStored();
PS.MarkAsLoaded();
}
} break;
case HLOpcodeGroup::HLSubscript: {
HLSubscriptOpcode opcode =
static_cast<HLSubscriptOpcode>(hlsl::GetHLOpcode(CI));
switch (opcode) {
case HLSubscriptOpcode::VectorSubscript:
case HLSubscriptOpcode::ColMatElement:
case HLSubscriptOpcode::ColMatSubscript:
case HLSubscriptOpcode::RowMatElement:
case HLSubscriptOpcode::RowMatSubscript:
analyzePointer(CI, PS, typeSys, bStructElt);
break;
default:
// Rest are resource ptr like buf[i].
// Only read of resource handle.
PS.MarkAsLoaded();
break;
}
} break;
default: {
// If not sure its out param or not. Take as out param.
PS.MarkAsStored();
PS.MarkAsLoaded();
}
}
continue;
}
unsigned argSize = F->arg_size();
for (unsigned i = 0; i < argSize; i++) {
Value *arg = CI->getArgOperand(i);
if (V == arg) {
// Do not replace struct arg.
// Mark stored and loaded to disable replace.
PS.MarkAsStored();
PS.MarkAsLoaded();
}
}
}
}
}
static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder) {
for (auto it = C->user_begin(); it != C->user_end(); ) {
User *U = *(it++);
@ -3754,6 +3547,15 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
}
}
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Src)) {
// For const GV, if has stored, mark as non-constant.
if (GV->isConstant()) {
hlutil::PointerStatus PS(GV, 0, /*bLdStOnly*/ true);
PS.analyze(typeSys, /*bStructElt*/ false);
if (PS.HasStored())
GV->setConstant(false);
}
}
Value *RawDest = MC->getOperand(0);
Value *RawSrc = MC->getOperand(1);
MC->eraseFromParent();
@ -3899,16 +3701,17 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
// if MemcpyOnce, replace with dest with src if dest is not out param.
// else flat memcpy.
unsigned size = DL.getTypeAllocSize(Ty->getPointerElementType());
PointerStatus PS(size);
hlutil::PointerStatus PS(V, size, /*bLdStOnly*/ false);
const bool bStructElt = false;
bool bEltMemcpy = true;
PointerStatus::analyzePointer(V, PS, typeSys, bStructElt);
PS.analyze(typeSys, bStructElt);
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {
if (PS.storedType == PointerStatus::StoredType::NotStored) {
PS.storedType = PointerStatus::StoredType::InitializerStored;
} else if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce) {
if (PS.storedType == hlutil::PointerStatus::StoredType::NotStored) {
PS.storedType = hlutil::PointerStatus::StoredType::InitializerStored;
} else if (PS.storedType ==
hlutil::PointerStatus::StoredType::MemcopyDestOnce) {
// For single mem store, if the store does not dominate all users.
// Mark it as Stored.
// In cases like:
@ -3920,27 +3723,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
if (isa<ConstantAggregateZero>(GV->getInitializer())) {
Instruction * Memcpy = PS.StoringMemcpy;
if (!ReplaceUseOfZeroInitBeforeDef(Memcpy, GV)) {
PS.storedType = PointerStatus::StoredType::Stored;
PS.storedType = hlutil::PointerStatus::StoredType::Stored;
}
}
} else {
PS.storedType = PointerStatus::StoredType::Stored;
PS.storedType = hlutil::PointerStatus::StoredType::Stored;
}
}
} else if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce) {
} else if (PS.storedType ==
hlutil::PointerStatus::StoredType::MemcopyDestOnce) {
// As above, it the memcpy doesn't dominate all its users,
// full replacement isn't possible without complicated PHI insertion
// This will likely replace with ld/st which will be replaced in mem2reg
Instruction *Memcpy = PS.StoringMemcpy;
if (!DominateAllUsers(Memcpy, V)) {
PS.storedType = PointerStatus::StoredType::Stored;
PS.storedType = hlutil::PointerStatus::StoredType::Stored;
// Replacing a memcpy with a memcpy with the same signature will just bring us back here
bEltMemcpy = false;
}
}
if (bAllowReplace && !PS.HasMultipleAccessingFunctions) {
if (PS.storedType == PointerStatus::StoredType::MemcopyDestOnce &&
if (PS.storedType == hlutil::PointerStatus::StoredType::MemcopyDestOnce &&
// Skip argument for input argument has input value, it is not dest once anymore.
!isa<Argument>(V)) {
// Replace with src of memcpy.
@ -3975,15 +3779,16 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
// Resource ptr should not be replaced.
// Need to make sure src not updated after current memcpy.
// Check Src only have 1 store now.
PointerStatus SrcPS(size);
PointerStatus::analyzePointer(Src, SrcPS, typeSys, bStructElt);
if (SrcPS.storedType != PointerStatus::StoredType::Stored) {
hlutil::PointerStatus SrcPS(Src, size, /*bLdStOnly*/ false);
SrcPS.analyze(typeSys, bStructElt);
if (SrcPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
ReplaceMemcpy(V, Src, MC, annotation, typeSys, DL);
return true;
}
}
}
} else if (PS.loadedType == PointerStatus::LoadedType::MemcopySrcOnce) {
} else if (PS.loadedType ==
hlutil::PointerStatus::LoadedType::MemcopySrcOnce) {
// Replace dst of memcpy.
MemCpyInst *MC = PS.LoadingMemcpy;
if (MC->getSourceAddressSpace() == MC->getDestAddressSpace()) {
@ -3998,9 +3803,9 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
!isa<BitCastOperator>(Dest)) {
// Need to make sure Dest not updated after current memcpy.
// Check Dest only have 1 store now.
PointerStatus DestPS(size);
PointerStatus::analyzePointer(Dest, DestPS, typeSys, bStructElt);
if (DestPS.storedType != PointerStatus::StoredType::Stored) {
hlutil::PointerStatus DestPS(Dest, size, /*bLdStOnly*/ false);
DestPS.analyze(typeSys, bStructElt);
if (DestPS.storedType != hlutil::PointerStatus::StoredType::Stored) {
ReplaceMemcpy(Dest, V, MC, annotation, typeSys, DL);
// V still need to be flatten.
// Lower memcpy come from Dest.
@ -6401,11 +6206,10 @@ void PatchDebugInfo(DebugInfoFinder &DbgFinder, Function *F, GlobalVariable *GV,
bool LowerStaticGlobalIntoAlloca::lowerStaticGlobalIntoAlloca(GlobalVariable *GV, const DataLayout &DL) {
DxilTypeSystem &typeSys = m_pHLModule->GetTypeSystem();
unsigned size = DL.getTypeAllocSize(GV->getType()->getElementType());
PointerStatus PS(size);
hlutil::PointerStatus PS(GV, size, /*bLdStOnly*/ false);
GV->removeDeadConstantUsers();
PS.analyzePointer(GV, PS, typeSys, /*bStructElt*/ false);
bool NotStored = (PS.storedType == PointerStatus::StoredType::NotStored) ||
(PS.storedType == PointerStatus::StoredType::InitializerStored);
PS.analyze(typeSys, /*bStructElt*/ false);
bool NotStored = !PS.HasStored();
// Make sure GV only used in one function.
// Skip GV which don't have store.
if (PS.HasMultipleAccessingFunctions || NotStored)

Просмотреть файл

@ -1578,6 +1578,11 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) {
// Construct annoation for this pointer.
ConstructFieldAttributedAnnotation(paramAnnotation, ThisTy,
bDefaultRowMajor);
if (MethodDecl->isConst()) {
paramAnnotation.SetParamInputQual(DxilParamInputQual::In);
} else {
paramAnnotation.SetParamInputQual(DxilParamInputQual::Inout);
}
}
}
@ -5393,7 +5398,6 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
bool isAggregateType = !isObject &&
(ParamTy->isArrayType() || ParamTy->isRecordType()) &&
!hlsl::IsHLSLVecMatType(ParamTy);
bool bInOut = Param->isModifierIn() && Param->isModifierOut();
bool EmitRValueAgg = false;
bool RValOnRef = false;
@ -5471,9 +5475,21 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
argLV = CGF.EmitLValue(Arg);
if (argLV.isSimple())
argAddr = argLV.getAddress();
// Skip copy-in copy-out for local variables.
if (bInOut && argAddr &&
(isa<AllocaInst>(argAddr) || isa<Argument>(argAddr))) {
// When there's argument need to lower like buffer/cbuffer load, need to
// copy to let the lower not happen on argument when calle is noinline or
// extern functions. Will do it in HLLegalizeParameter after known which
// functions are extern but before inline.
bool bConstGlobal = false;
if (GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(argAddr)) {
bConstGlobal = m_ConstVarAnnotationMap.count(GV) | GV->isConstant();
}
// Skip copy-in copy-out when safe.
// The unsafe case will be global variable alias with parameter.
// Then global variable is updated in the function, the parameter will
// be updated silently. For non global variable or constant global
// variable, it should be safe.
if (argAddr && (isa<AllocaInst>(argAddr) || isa<Argument>(argAddr) ||
bConstGlobal)) {
llvm::Type *ToTy = CGF.ConvertType(ParamTy.getNonReferenceType());
if (argAddr->getType()->getPointerElementType() == ToTy &&
// Check clang Type for case like int cast to unsigned.

Просмотреть файл

@ -12,13 +12,8 @@
// CHECK: i1 false
// CHECK: texture2DMSGetSamplePosition
// CHECK: getDimensions
// CHECK: llvm.dbg.value(metadata i32 %
// CHECK: llvm.dbg.value(metadata i32 %
// CHECK: getDimensions
// CHECK: llvm.dbg.value(metadata i32 %
// CHECK: llvm.dbg.value(metadata i32 %
// CHECK: getDimensions
// CHECK: llvm.dbg.value(metadata i32 %
// CHECK: getDimensions
// Exclude quoted source file (see readme)

Просмотреть файл

@ -0,0 +1,62 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Make sure static global a not alias with local t.
// store io before call foo. Value should be 0.
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i8 1)
// CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 4, float 0.000000e+00, float undef, float undef, float undef, i8 1)
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 0, i32 8, i32 0, i32 undef, i32 undef, i32 undef, i8 1)
// store io after ++ in foo and after call foo. Value should be 1.
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
// CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 4, float 1.000000e+00, float undef, float undef, float undef, i8 1)
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 8, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
// CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 4, float 1.000000e+00, float undef, float undef, float undef, i8 1)
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 1, i32 8, i32 1, i32 undef, i32 undef, i32 undef, i8 1)
// sore a after bar. Value should be -1.
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 0, i32 -1, i32 undef, i32 undef, i32 undef, i8 1)
// CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 4, float -1.000000e+00, float undef, float undef, float undef, i8 1)
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 -1, i32 8, i32 -1, i32 undef, i32 undef, i32 undef, i8 1)
// Make sure return 3.
// CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 3.000000e+00)
struct ST {
int a;
float b;
uint c;
};
static ST a;
const static ST b = {0, 0, 0};
RWStructuredBuffer<ST> u;
void foo(inout ST io) {
io.a++;
io.b++;
io.c++;
u[io.a] = io;
}
void bar(inout ST io) {
a.a--;
a.b--;
a.c--;
u[io.a] = io;
foo(io);
u[io.a] = io;
}
float main() : SV_Target {
a = b;
ST t = a;
bar(t);
u[a.a] = a;
return t.a + t.b + t.c;
}

Просмотреть файл

@ -0,0 +1,23 @@
// RUN: %dxc -E main -Tps_6_0 -fcgl %s | FileCheck %s
// Make sure no memcpy generated.
// CHECK:@main
// CHECK-NOT:memcpy
struct Data
{
float4 f[64];
};
cbuffer A {
Data a;
};
float4 foo(Data d, int i) {
return d.f[i];
}
float4 main(int i:I) :SV_Target {
return foo(a, i);
}

Просмотреть файл

@ -4,10 +4,10 @@
//CHECK: tx0.s sampler NA NA S0 s0 1
//CHECK: tx1.s sampler NA NA S1 s1 1
//CHECK: s sampler NA NA S2 s3 1
//CHECK: tx0.t2 texture f32 2d T0 t1 1
//CHECK: tx0.t texture f32 2d T1 t0 1
//CHECK: tx1.t2 texture f32 2d T2 t6 1
//CHECK: tx1.t texture f32 2d T3 t5 1
//CHECK: tx0.t texture f32 2d T0 t0 1
//CHECK: tx0.t2 texture f32 2d T1 t1 1
//CHECK: tx1.t texture f32 2d T2 t5 1
//CHECK: tx1.t2 texture f32 2d T3 t6 1
//CHECK: x texture f32 2d T4 t3 1
struct LegacyTex

Просмотреть файл

@ -1965,6 +1965,7 @@ class db_dxil(object):
add_pass('hlsl-hlemit', 'HLEmitMetadata', 'HLSL High-Level Metadata Emit.', [])
add_pass("hl-expand-store-intrinsics", "HLExpandStoreIntrinsics", "Expand HLSL store intrinsics", [])
add_pass("hl-legalize-parameter", "HLLegalizeParameter", "Legalize parameter", [])
add_pass('scalarrepl-param-hlsl', 'SROA_Parameter_HLSL', 'Scalar Replacement of Aggregates HLSL (parameters)', [])
add_pass('scalarreplhlsl', 'SROA_DT_HLSL', 'Scalar Replacement of Aggregates HLSL (DT)', [])
add_pass('scalarreplhlsl-ssa', 'SROA_SSAUp_HLSL', 'Scalar Replacement of Aggregates HLSL (SSAUp)', [])