DirectXShaderCompiler/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp

604 строки
20 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// //
// DxilShaderAccessTracking.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Provides a pass to add instrumentation to determine pixel hit count and //
// cost. Used by PIX. //
// //
///////////////////////////////////////////////////////////////////////////////
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilInstructions.h"
#include "dxc/DXIL/DxilModule.h"
#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
#include "dxc/HLSL/DxilGenerationPass.h"
#include "dxc/HLSL/DxilSpanAllocator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <deque>
#ifdef _WIN32
#include <winerror.h>
#endif
using namespace llvm;
using namespace hlsl;
void ThrowIf(bool a) {
if (a) {
throw ::hlsl::Exception(E_INVALIDARG);
}
}
//---------------------------------------------------------------------------------------------------------------------------------
// These types are taken from PIX's ShaderAccessHelpers.h
enum class ShaderAccessFlags : uint32_t {
None = 0,
Read = 1 << 0,
Write = 1 << 1,
// "Counter" access is only applicable to UAVs; it means the counter buffer
// attached to the UAV was accessed, but not necessarily the UAV resource.
Counter = 1 << 2,
// Descriptor-only read (if any), but not the resource contents (if any).
// Used for GetDimensions, samplers, and secondary texture for sampler
// feedback.
// TODO: Make this a unique value if supported in PIX, then enable
// GetDimensions
DescriptorRead = 1 << 0,
};
constexpr uint32_t DWORDsPerResource = 3;
constexpr uint32_t BytesPerDWORD = 4;
static uint32_t OffsetFromAccess(ShaderAccessFlags access) {
switch (access) {
case ShaderAccessFlags::Read:
return 0;
case ShaderAccessFlags::Write:
return 1;
case ShaderAccessFlags::Counter:
return 2;
default:
throw ::hlsl::Exception(E_INVALIDARG);
}
}
// This enum doesn't have to match PIX's version, because the values are
// received from PIX encoded in ASCII. However, for ease of comparing this code
// with PIX, and to be less confusing to future maintainers, this enum does
// indeed match the same-named enum in PIX.
enum class RegisterType {
CBV,
SRV,
UAV,
RTV, // not used.
DSV, // not used.
Sampler,
SOV, // not used.
Invalid,
Terminator
};
RegisterType RegisterTypeFromResourceClass(DXIL::ResourceClass c) {
switch (c) {
case DXIL::ResourceClass::SRV:
return RegisterType::SRV;
break;
case DXIL::ResourceClass::UAV:
return RegisterType::UAV;
break;
case DXIL::ResourceClass::CBuffer:
return RegisterType::CBV;
break;
case DXIL::ResourceClass::Sampler:
return RegisterType::Sampler;
break;
case DXIL::ResourceClass::Invalid:
return RegisterType::Invalid;
break;
default:
ThrowIf(true);
return RegisterType::Invalid;
}
}
struct RegisterTypeAndSpace {
bool operator<(const RegisterTypeAndSpace &o) const {
return static_cast<int>(Type) < static_cast<int>(o.Type) ||
(static_cast<int>(Type) == static_cast<int>(o.Type) &&
Space < o.Space);
}
RegisterType Type;
unsigned Space;
};
// Identifies a bind point as defined by the root signature
struct RSRegisterIdentifier {
RegisterType Type;
unsigned Space;
unsigned Index;
bool operator<(const RSRegisterIdentifier &o) const {
return static_cast<unsigned>(Type) < static_cast<unsigned>(o.Type) &&
Space < o.Space && Index < o.Index;
}
};
struct SlotRange {
unsigned startSlot;
unsigned numSlots;
// Number of slots needed if no descriptors from unbounded ranges are included
unsigned numInvariableSlots;
};
struct DxilResourceAndClass {
DxilResourceBase *resource;
Value *index;
DXIL::ResourceClass resClass;
};
//---------------------------------------------------------------------------------------------------------------------------------
class DxilShaderAccessTracking : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
explicit DxilShaderAccessTracking() : ModulePass(ID) {}
const char *getPassName() const override {
return "DXIL shader access tracking";
}
bool runOnModule(Module &M) override;
void applyOptions(PassOptions O) override;
private:
void EmitAccess(LLVMContext &Ctx, OP *HlslOP, IRBuilder<> &, Value *slot,
ShaderAccessFlags access);
bool EmitResourceAccess(DxilResourceAndClass &res, Instruction *instruction,
OP *HlslOP, LLVMContext &Ctx,
ShaderAccessFlags readWrite);
private:
bool m_CheckForDynamicIndexing = false;
std::map<RegisterTypeAndSpace, SlotRange> m_slotAssignments;
std::map<llvm::Function *, CallInst *> m_FunctionToUAVHandle;
std::set<RSRegisterIdentifier> m_DynamicallyIndexedBindPoints;
};
static unsigned DeserializeInt(std::deque<char> &q) {
unsigned i = 0;
while (!q.empty() && isdigit(q.front())) {
i *= 10;
i += q.front() - '0';
q.pop_front();
}
return i;
}
static char DequeFront(std::deque<char> &q) {
ThrowIf(q.empty());
auto c = q.front();
q.pop_front();
return c;
}
static RegisterType ParseRegisterType(std::deque<char> &q) {
switch (DequeFront(q)) {
case 'C':
return RegisterType::CBV;
case 'S':
return RegisterType::SRV;
case 'U':
return RegisterType::UAV;
case 'M':
return RegisterType::Sampler;
case 'I':
return RegisterType::Invalid;
default:
return RegisterType::Terminator;
}
}
static char EncodeRegisterType(RegisterType r) {
switch (r) {
case RegisterType::CBV:
return 'C';
case RegisterType::SRV:
return 'S';
case RegisterType::UAV:
return 'U';
case RegisterType::Sampler:
return 'M';
case RegisterType::Invalid:
return 'I';
}
return '.';
}
static void ValidateDelimiter(std::deque<char> &q, char d) {
ThrowIf(q.front() != d);
q.pop_front();
}
void DxilShaderAccessTracking::applyOptions(PassOptions O) {
int checkForDynamic;
GetPassOptionInt(O, "checkForDynamicIndexing", &checkForDynamic, 0);
m_CheckForDynamicIndexing = checkForDynamic != 0;
StringRef configOption;
if (GetPassOption(O, "config", &configOption)) {
std::deque<char> config;
config.assign(configOption.begin(), configOption.end());
// Parse slot assignments. Compare with PIX's ShaderAccessHelpers.cpp
// (TrackingConfiguration::SerializedRepresentation)
RegisterType rt = ParseRegisterType(config);
while (rt != RegisterType::Terminator) {
RegisterTypeAndSpace rst;
rst.Type = rt;
rst.Space = DeserializeInt(config);
ValidateDelimiter(config, ':');
SlotRange sr;
sr.startSlot = DeserializeInt(config);
ValidateDelimiter(config, ':');
sr.numSlots = DeserializeInt(config);
ValidateDelimiter(config, 'i');
sr.numInvariableSlots = DeserializeInt(config);
ValidateDelimiter(config, ';');
m_slotAssignments[rst] = sr;
rt = ParseRegisterType(config);
}
}
}
void DxilShaderAccessTracking::EmitAccess(LLVMContext &Ctx, OP *HlslOP,
IRBuilder<> &Builder,
Value *ByteIndex,
ShaderAccessFlags access) {
unsigned OffsetForAccessType =
static_cast<unsigned>(OffsetFromAccess(access) * BytesPerDWORD);
auto OffsetByteIndex = Builder.CreateAdd(
ByteIndex, HlslOP->GetU32Const(OffsetForAccessType), "OffsetByteIndex");
UndefValue *UndefIntArg = UndefValue::get(Type::getInt32Ty(Ctx));
Constant *LiteralOne = HlslOP->GetU32Const(1);
Constant *ElementMask = HlslOP->GetI8Const(1);
Function *StoreFunc =
HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(Ctx));
Constant *StoreOpcode =
HlslOP->GetU32Const((unsigned)OP::OpCode::BufferStore);
(void)Builder.CreateCall(
StoreFunc,
{
StoreOpcode, // i32, ; opcode
m_FunctionToUAVHandle.at(
Builder.GetInsertBlock()
->getParent()), // %dx.types.Handle, ; resource handle
OffsetByteIndex, // i32, ; coordinate c0: byte offset
UndefIntArg, // i32, ; coordinate c1 (unused)
LiteralOne, // i32, ; value v0
UndefIntArg, // i32, ; value v1
UndefIntArg, // i32, ; value v2
UndefIntArg, // i32, ; value v3
ElementMask // i8 ; just the first value is used
});
}
bool DxilShaderAccessTracking::EmitResourceAccess(DxilResourceAndClass &res,
Instruction *instruction,
OP *HlslOP, LLVMContext &Ctx,
ShaderAccessFlags readWrite) {
RegisterTypeAndSpace typeAndSpace{RegisterTypeFromResourceClass(res.resClass),
res.resource->GetSpaceID()};
auto slot = m_slotAssignments.find(typeAndSpace);
// If the assignment isn't found, we assume it's not accessed
if (slot != m_slotAssignments.end()) {
IRBuilder<> Builder(instruction);
Value *slotIndex;
if (isa<ConstantInt>(res.index)) {
unsigned index = cast<ConstantInt>(res.index)->getLimitedValue();
if (index > slot->second.numSlots) {
// out-of-range accesses are written to slot zero:
slotIndex = HlslOP->GetU32Const(0);
} else {
slotIndex = HlslOP->GetU32Const((slot->second.startSlot + index) *
DWORDsPerResource * BytesPerDWORD);
}
} else {
RSRegisterIdentifier id{typeAndSpace.Type, typeAndSpace.Space,
res.resource->GetID()};
m_DynamicallyIndexedBindPoints.emplace(std::move(id));
// CompareWithSlotLimit will contain 1 if the access is out-of-bounds
// (both over- and and under-flow via the unsigned >= with slot count)
auto CompareWithSlotLimit = Builder.CreateICmpUGE(
res.index, HlslOP->GetU32Const(slot->second.numSlots),
"CompareWithSlotLimit");
auto CompareWithSlotLimitAsUint = Builder.CreateCast(
Instruction::CastOps::ZExt, CompareWithSlotLimit,
Type::getInt32Ty(Ctx), "CompareWithSlotLimitAsUint");
// IsInBounds will therefore contain 0 if the access is out-of-bounds, and
// 1 otherwise.
auto IsInBounds = Builder.CreateSub(
HlslOP->GetU32Const(1), CompareWithSlotLimitAsUint, "IsInBounds");
auto SlotDwordOffset = Builder.CreateAdd(
res.index, HlslOP->GetU32Const(slot->second.startSlot),
"SlotDwordOffset");
auto SlotByteOffset = Builder.CreateMul(
SlotDwordOffset,
HlslOP->GetU32Const(DWORDsPerResource * BytesPerDWORD),
"SlotByteOffset");
// This will drive an out-of-bounds access slot down to 0
slotIndex = Builder.CreateMul(SlotByteOffset, IsInBounds, "slotIndex");
}
EmitAccess(Ctx, HlslOP, Builder, slotIndex, readWrite);
return true; // did modify
}
return false; // did not modify
}
DxilResourceAndClass GetResourceFromHandle(Value *resHandle, DxilModule &DM) {
DxilResourceAndClass ret{nullptr, nullptr, DXIL::ResourceClass::Invalid};
CallInst *handle = cast<CallInst>(resHandle);
DxilInst_CreateHandle createHandle(handle);
// Dynamic rangeId is not supported - skip and let validation report the
// error.
if (!isa<ConstantInt>(createHandle.get_rangeId()))
return ret;
unsigned rangeId =
cast<ConstantInt>(createHandle.get_rangeId())->getLimitedValue();
auto resClass =
static_cast<DXIL::ResourceClass>(createHandle.get_resourceClass_val());
switch (resClass) {
case DXIL::ResourceClass::SRV:
ret.resource = &DM.GetSRV(rangeId);
break;
case DXIL::ResourceClass::UAV:
ret.resource = &DM.GetUAV(rangeId);
break;
case DXIL::ResourceClass::CBuffer:
ret.resource = &DM.GetCBuffer(rangeId);
break;
case DXIL::ResourceClass::Sampler:
ret.resource = &DM.GetSampler(rangeId);
break;
default:
DXASSERT(0, "invalid res class");
return ret;
}
ret.index = createHandle.get_index();
ret.resClass = resClass;
return ret;
}
bool DxilShaderAccessTracking::runOnModule(Module &M) {
// This pass adds instrumentation for shader access to resources
DxilModule &DM = M.GetOrCreateDxilModule();
LLVMContext &Ctx = M.getContext();
OP *HlslOP = DM.GetOP();
bool Modified = false;
if (m_CheckForDynamicIndexing) {
bool FoundDynamicIndexing = false;
auto CreateHandleFn =
HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
auto CreateHandleUses = CreateHandleFn->uses();
for (auto FI = CreateHandleUses.begin(); FI != CreateHandleUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
auto instruction = cast<Instruction>(FunctionUser);
Value *index = instruction->getOperand(3);
if (!isa<Constant>(index)) {
FoundDynamicIndexing = true;
break;
}
}
if (FoundDynamicIndexing) {
if (OSOverride != nullptr) {
formatted_raw_ostream FOS(*OSOverride);
FOS << "FoundDynamicIndexing";
}
}
} else {
{
if (DM.m_ShaderFlags.GetForceEarlyDepthStencil()) {
if (OSOverride != nullptr) {
formatted_raw_ostream FOS(*OSOverride);
FOS << "ShouldAssumeDsvAccess";
}
}
for (llvm::Function &F : M.functions()) {
if (!F.getBasicBlockList().empty()) {
IRBuilder<> Builder(F.getEntryBlock().getFirstInsertionPt());
unsigned int UAVResourceHandle =
static_cast<unsigned int>(DM.GetUAVs().size());
// Set up a UAV with structure of a single int
SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(Ctx)};
llvm::StructType *UAVStructTy =
llvm::StructType::create(Elements, "class.RWStructuredBuffer");
std::unique_ptr<DxilResource> pUAV =
llvm::make_unique<DxilResource>();
pUAV->SetGlobalName("PIX_CountUAVName");
pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
pUAV->SetID(UAVResourceHandle);
pUAV->SetSpaceID((
unsigned int)-2); // This is the reserved-for-tools register space
pUAV->SetSampleCount(1);
pUAV->SetGloballyCoherent(false);
pUAV->SetHasCounter(false);
pUAV->SetCompType(CompType::getI32());
pUAV->SetLowerBound(0);
pUAV->SetRangeSize(1);
pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
auto pAnnotation =
DM.GetTypeSystem().GetStructAnnotation(UAVStructTy);
if (pAnnotation == nullptr) {
pAnnotation = DM.GetTypeSystem().AddStructAnnotation(UAVStructTy);
pAnnotation->GetFieldAnnotation(0).SetCBufferOffset(0);
pAnnotation->GetFieldAnnotation(0).SetCompType(
hlsl::DXIL::ComponentType::I32);
pAnnotation->GetFieldAnnotation(0).SetFieldName("count");
}
ID = DM.AddUAV(std::move(pUAV));
assert((unsigned)ID == UAVResourceHandle);
// Create handle for the newly-added UAV
Function *CreateHandleOpFunc = HlslOP->GetOpFunc(
DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx));
Constant *CreateHandleOpcodeArg =
HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
Constant *UAVArg = HlslOP->GetI8Const(
static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
DXIL::ResourceClass::UAV));
Constant *MetaDataArg =
HlslOP->GetU32Const(ID); // position of the metadata record in the
// corresponding metadata list
Constant *IndexArg = HlslOP->GetU32Const(0); //
Constant *FalseArg =
HlslOP->GetI1Const(0); // non-uniform resource index: false
m_FunctionToUAVHandle[&F] = Builder.CreateCall(
CreateHandleOpFunc,
{CreateHandleOpcodeArg, UAVArg, MetaDataArg, IndexArg, FalseArg},
"PIX_CountUAV_Handle");
}
}
DM.ReEmitDxilResources();
}
for (llvm::Function &F : M.functions()) {
// Only used DXIL intrinsics:
if (!F.isDeclaration() || F.isIntrinsic() || F.use_empty() ||
!OP::IsDxilOpFunc(&F))
continue;
// Gather handle parameter indices, if any
FunctionType *fnTy =
cast<FunctionType>(F.getType()->getPointerElementType());
SmallVector<unsigned, 4> handleParams;
for (unsigned iParam = 1; iParam < fnTy->getFunctionNumParams();
++iParam) {
if (fnTy->getParamType(iParam) == HlslOP->GetHandleType())
handleParams.push_back(iParam);
}
if (handleParams.empty())
continue;
auto FunctionUses = F.uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
auto Call = cast<CallInst>(FunctionUser);
auto opCode = OP::GetDxilOpFuncCallInst(Call);
// Base Read/Write on function attribute - should match for all normal
// resource operations
ShaderAccessFlags readWrite = ShaderAccessFlags::Write;
if (OP::GetMemAccessAttr(opCode) == llvm::Attribute::AttrKind::ReadOnly)
readWrite = ShaderAccessFlags::Read;
// Special cases
switch (opCode) {
case DXIL::OpCode::GetDimensions:
// readWrite = ShaderAccessFlags::DescriptorRead; // TODO: Support
// GetDimensions
continue;
case DXIL::OpCode::BufferUpdateCounter:
readWrite = ShaderAccessFlags::Counter;
break;
case DXIL::OpCode::TraceRay:
case DXIL::OpCode::RayQuery_TraceRayInline:
// Read of AccelerationStructure; doesn't match function attribute
// readWrite = ShaderAccessFlags::Read; // TODO: Support
// TraceRay[Inline]
continue;
default:
break;
}
for (unsigned iParam : handleParams) {
auto res = GetResourceFromHandle(Call->getArgOperand(iParam), DM);
// Don't instrument the accesses to the UAV that we just added
if (res.resClass == DXIL::ResourceClass::UAV &&
res.resource->GetSpaceID() == (unsigned)-2) {
break;
}
if (EmitResourceAccess(res, Call, HlslOP, Ctx, readWrite)) {
Modified = true;
}
// Remaining resources are DescriptorRead.
readWrite = ShaderAccessFlags::DescriptorRead;
}
}
}
if (OSOverride != nullptr) {
formatted_raw_ostream FOS(*OSOverride);
FOS << "DynamicallyIndexedBindPoints=";
for (auto const &bp : m_DynamicallyIndexedBindPoints) {
FOS << EncodeRegisterType(bp.Type) << bp.Space << ':' << bp.Index
<< ';';
}
FOS << ".";
}
}
return Modified;
}
char DxilShaderAccessTracking::ID = 0;
ModulePass *llvm::createDxilShaderAccessTrackingPass() {
return new DxilShaderAccessTracking();
}
INITIALIZE_PASS(DxilShaderAccessTracking,
"hlsl-dxil-pix-shader-access-instrumentation",
"HLSL DXIL shader access tracking for PIX", false, false)