391 строка
14 KiB
C++
391 строка
14 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// //
|
|
// DxilAddPixelHitInstrumentation.cpp //
|
|
// Copyright (C) Microsoft Corporation. All rights reserved. //
|
|
// This file is distributed under the University of Illinois Open Source //
|
|
// License. See LICENSE.TXT for details. //
|
|
// //
|
|
// Provides a pass to add instrumentation to retrieve mesh shader output. //
|
|
// Used by PIX. //
|
|
// //
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include "dxc/DXIL/DxilOperations.h"
|
|
#include "dxc/DXIL/DxilUtil.h"
|
|
|
|
#include "dxc/DXIL/DxilInstructions.h"
|
|
#include "dxc/DXIL/DxilModule.h"
|
|
#include "dxc/DxilPIXPasses/DxilPIXPasses.h"
|
|
#include "dxc/HLSL/DxilGenerationPass.h"
|
|
#include "dxc/HLSL/DxilSpanAllocator.h"
|
|
|
|
#include "llvm/IR/PassManager.h"
|
|
#include "llvm/Support/FormattedStream.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
#include <deque>
|
|
|
|
#ifdef _WIN32
|
|
#include <winerror.h>
|
|
#endif
|
|
|
|
// Keep these in sync with the same-named value in the debugger application's
|
|
// WinPixShaderUtils.h
|
|
|
|
constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
|
|
// The actual max size per record is much smaller than this, but it never
|
|
// hurts to be generous.
|
|
constexpr size_t CounterOffsetBeyondUsefulData = DebugBufferDumpingGroundSize / 2;
|
|
|
|
// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
|
|
constexpr uint32_t triangleIndexIndicator = 1;
|
|
constexpr uint32_t int32ValueIndicator = 2;
|
|
constexpr uint32_t floatValueIndicator = 3;
|
|
constexpr uint32_t int16ValueIndicator = 4;
|
|
constexpr uint32_t float16ValueIndicator = 5;
|
|
|
|
using namespace llvm;
|
|
using namespace hlsl;
|
|
|
|
class DxilPIXMeshShaderOutputInstrumentation : public ModulePass
|
|
{
|
|
public:
|
|
static char ID; // Pass identification, replacement for typeid
|
|
explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
|
|
const char *getPassName() const override {
|
|
return "DXIL mesh shader output instrumentation";
|
|
}
|
|
void applyOptions(PassOptions O) override;
|
|
bool runOnModule(Module &M) override;
|
|
|
|
private:
|
|
CallInst *m_OutputUAV = nullptr;
|
|
int m_RemainingReservedSpaceInBytes = 0;
|
|
Constant *m_OffsetMask = nullptr;
|
|
|
|
uint64_t m_UAVSize = 1024 * 1024;
|
|
|
|
struct BuilderContext {
|
|
Module &M;
|
|
DxilModule &DM;
|
|
LLVMContext &Ctx;
|
|
OP *HlslOP;
|
|
IRBuilder<> &Builder;
|
|
};
|
|
|
|
CallInst *addUAV(BuilderContext &BC);
|
|
Value *insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC);
|
|
Value *insertInstructionsToCalculateGroupIdZ(BuilderContext &BC);
|
|
Value *reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes);
|
|
uint32_t UAVDumpingGroundOffset();
|
|
Value *writeDwordAndReturnNewOffset(BuilderContext &BC, Value *TheOffset,
|
|
Value *TheValue);
|
|
template <typename... T> void Instrument(BuilderContext &BC, T... values);
|
|
};
|
|
|
|
void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O)
|
|
{
|
|
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
|
|
}
|
|
|
|
uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
|
|
{
|
|
return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
|
|
}
|
|
|
|
CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC)
|
|
{
|
|
// Set up a UAV with structure of a single int
|
|
unsigned int UAVResourceHandle =
|
|
static_cast<unsigned int>(BC.DM.GetUAVs().size());
|
|
SmallVector<llvm::Type *, 1> Elements{Type::getInt32Ty(BC.Ctx)};
|
|
llvm::StructType *UAVStructTy =
|
|
llvm::StructType::create(Elements, "PIX_DebugUAV_Type");
|
|
std::unique_ptr<DxilResource> pUAV = llvm::make_unique<DxilResource>();
|
|
pUAV->SetGlobalName("PIX_DebugUAVName");
|
|
pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo()));
|
|
pUAV->SetID(UAVResourceHandle);
|
|
pUAV->SetSpaceID(
|
|
(unsigned int)-2); // This is the reserved-for-tools register space
|
|
pUAV->SetSampleCount(1);
|
|
pUAV->SetGloballyCoherent(false);
|
|
pUAV->SetHasCounter(false);
|
|
pUAV->SetCompType(CompType::getI32());
|
|
pUAV->SetLowerBound(0);
|
|
pUAV->SetRangeSize(1);
|
|
pUAV->SetKind(DXIL::ResourceKind::RawBuffer);
|
|
pUAV->SetRW(true);
|
|
|
|
auto ID = BC.DM.AddUAV(std::move(pUAV));
|
|
assert(ID == UAVResourceHandle);
|
|
|
|
BC.DM.m_ShaderFlags.SetEnableRawAndStructuredBuffers(true);
|
|
|
|
// Create handle for the newly-added UAV
|
|
Function *CreateHandleOpFunc =
|
|
BC.HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(BC.Ctx));
|
|
Constant *CreateHandleOpcodeArg =
|
|
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle);
|
|
Constant *UAVVArg = BC.HlslOP->GetI8Const(
|
|
static_cast<std::underlying_type<DxilResourceBase::Class>::type>(
|
|
DXIL::ResourceClass::UAV));
|
|
Constant *MetaDataArg = BC.HlslOP->GetU32Const(
|
|
ID); // position of the metadata record in the corresponding metadata list
|
|
Constant *IndexArg = BC.HlslOP->GetU32Const(0); //
|
|
Constant *FalseArg =
|
|
BC.HlslOP->GetI1Const(0); // non-uniform resource index: false
|
|
return BC.Builder.CreateCall(
|
|
CreateHandleOpFunc,
|
|
{CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg},
|
|
"PIX_DebugUAV_Handle");
|
|
}
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
|
|
{
|
|
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
|
|
Constant *One32Arg = BC.HlslOP->GetU32Const(1);
|
|
|
|
auto GroupIdFunc =
|
|
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
|
|
Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
|
|
auto GroupIdX =
|
|
BC.Builder.CreateCall(GroupIdFunc, {Opcode, Zero32Arg}, "GroupIdX");
|
|
auto GroupIdY =
|
|
BC.Builder.CreateCall(GroupIdFunc, {Opcode, One32Arg}, "GroupIdY");
|
|
|
|
// Spec requires that no group id index is greater than 64k, so we can
|
|
// combine two into one 32-bit value:
|
|
auto YShifted =
|
|
BC.Builder.CreateShl(GroupIdY, 16);
|
|
return BC.Builder.CreateAdd(YShifted, GroupIdX);
|
|
}
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::
|
|
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC)
|
|
{
|
|
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
|
|
auto GroupIdFunc =
|
|
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
|
|
Constant *Opcode = BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::GroupId);
|
|
|
|
return BC.Builder.CreateCall(GroupIdFunc, {Opcode, Two32Arg}, "GroupIdZ");
|
|
}
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|
BuilderContext &BC, uint32_t SpaceInBytes)
|
|
{
|
|
|
|
// Check the previous caller didn't reserve too much space:
|
|
assert(m_RemainingReservedSpaceInBytes == 0);
|
|
|
|
// Check that the caller didn't ask for so much memory that it will
|
|
// overwrite the offset counter:
|
|
assert(m_RemainingReservedSpaceInBytes < (int)CounterOffsetBeyondUsefulData);
|
|
|
|
m_RemainingReservedSpaceInBytes = SpaceInBytes;
|
|
|
|
// Insert the UAV increment instruction:
|
|
Function *AtomicOpFunc =
|
|
BC.HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(BC.Ctx));
|
|
Constant *AtomicBinOpcode =
|
|
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
|
|
Constant *AtomicAdd =
|
|
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
|
|
Constant *OffsetArg =
|
|
BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + CounterOffsetBeyondUsefulData);
|
|
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
|
|
|
|
Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
|
|
|
|
auto *PreviousValue = BC.Builder.CreateCall(
|
|
AtomicOpFunc,
|
|
{
|
|
AtomicBinOpcode, // i32, ; opcode
|
|
m_OutputUAV, // %dx.types.Handle, ; resource handle
|
|
AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR,
|
|
// XOR, IMIN, IMAX, UMIN, UMAX
|
|
OffsetArg, // i32, ; coordinate c0: index in bytes
|
|
UndefArg, // i32, ; coordinate c1 (unused)
|
|
UndefArg, // i32, ; coordinate c2 (unused)
|
|
Increment, // i32); increment value
|
|
},
|
|
"UAVIncResult");
|
|
|
|
return BC.Builder.CreateAnd(PreviousValue, m_OffsetMask, "MaskedForUAVLimit");
|
|
}
|
|
|
|
Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
|
|
BuilderContext &BC, Value *TheOffset, Value *TheValue)
|
|
{
|
|
|
|
Function *StoreValue =
|
|
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
|
|
Constant *StoreValueOpcode =
|
|
BC.HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferStore);
|
|
UndefValue *Undef32Arg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
|
|
Constant *WriteMask_X = BC.HlslOP->GetI8Const(1);
|
|
|
|
(void)BC.Builder.CreateCall(
|
|
StoreValue,
|
|
{StoreValueOpcode, // i32 opcode
|
|
m_OutputUAV, // %dx.types.Handle, ; resource handle
|
|
TheOffset, // i32 c0: index in bytes into UAV
|
|
Undef32Arg, // i32 c1: unused
|
|
TheValue,
|
|
Undef32Arg, // unused values
|
|
Undef32Arg, // unused values
|
|
Undef32Arg, // unused values
|
|
WriteMask_X});
|
|
|
|
m_RemainingReservedSpaceInBytes -= sizeof(uint32_t);
|
|
assert(m_RemainingReservedSpaceInBytes >=
|
|
0); // or else the caller didn't reserve enough space
|
|
|
|
return BC.Builder.CreateAdd(
|
|
TheOffset,
|
|
BC.HlslOP->GetU32Const(static_cast<unsigned int>(sizeof(uint32_t))));
|
|
}
|
|
|
|
template <typename... T>
|
|
void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
|
|
T... values)
|
|
{
|
|
llvm::SmallVector<llvm::Value *, 10> Values(
|
|
{static_cast<llvm::Value *>(values)...});
|
|
const uint32_t DwordCount = Values.size();
|
|
llvm::Value *byteOffset =
|
|
reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
|
|
for (llvm::Value *V : Values)
|
|
{
|
|
byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
|
|
}
|
|
}
|
|
|
|
bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
|
|
{
|
|
DxilModule &DM = M.GetOrCreateDxilModule();
|
|
LLVMContext &Ctx = M.getContext();
|
|
OP *HlslOP = DM.GetOP();
|
|
|
|
Instruction *firstInsertionPt =
|
|
dxilutil::FirstNonAllocaInsertionPt(DM.GetEntryFunction());
|
|
IRBuilder<> Builder(firstInsertionPt);
|
|
|
|
BuilderContext BC{M, DM, Ctx, HlslOP, Builder};
|
|
|
|
m_OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1);
|
|
|
|
m_OutputUAV = addUAV(BC);
|
|
|
|
auto GroupIdXandY = insertInstructionsToCalculateFlattenedGroupIdXandY(BC);
|
|
auto GroupIdZ = insertInstructionsToCalculateGroupIdZ(BC);
|
|
|
|
auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
|
|
auto FunctionUses = F->uses();
|
|
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
|
{
|
|
auto &FunctionUse = *FI++;
|
|
auto FunctionUser = FunctionUse.getUser();
|
|
|
|
auto Call = cast<CallInst>(FunctionUser);
|
|
|
|
IRBuilder<> Builder2(Call);
|
|
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
|
|
|
Instrument(BC2, BC2.HlslOP->GetI32Const(triangleIndexIndicator),
|
|
GroupIdXandY, GroupIdZ, Call->getOperand(1),
|
|
Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
|
|
}
|
|
|
|
struct OutputType
|
|
{
|
|
Type *type;
|
|
uint32_t tag;
|
|
};
|
|
SmallVector<OutputType, 4> StoreVertexOutputOverloads
|
|
{
|
|
{Type::getInt32Ty(Ctx), int32ValueIndicator},
|
|
{Type::getInt16Ty(Ctx), int16ValueIndicator},
|
|
{Type::getFloatTy(Ctx), floatValueIndicator},
|
|
{Type::getHalfTy(Ctx), float16ValueIndicator}
|
|
};
|
|
|
|
for (auto const &Overload : StoreVertexOutputOverloads)
|
|
{
|
|
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type);
|
|
FunctionUses = F->uses();
|
|
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
|
{
|
|
auto &FunctionUse = *FI++;
|
|
auto FunctionUser = FunctionUse.getUser();
|
|
|
|
auto Call = cast<CallInst>(FunctionUser);
|
|
|
|
IRBuilder<> Builder2(Call);
|
|
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
|
|
|
// Expand column index to 32 bits:
|
|
auto ColumnIndex = BC2.Builder.CreateCast(
|
|
Instruction::ZExt,
|
|
Call->getOperand(3),
|
|
Type::getInt32Ty(Ctx));
|
|
|
|
// Coerce actual value to int32
|
|
Value *CoercedValue = Call->getOperand(4);
|
|
|
|
if (Overload.tag == floatValueIndicator)
|
|
{
|
|
CoercedValue = BC2.Builder.CreateCast(
|
|
Instruction::BitCast,
|
|
CoercedValue,
|
|
Type::getInt32Ty(Ctx));
|
|
}
|
|
else if (Overload.tag == float16ValueIndicator)
|
|
{
|
|
auto * HalfInt = BC2.Builder.CreateCast(
|
|
Instruction::BitCast,
|
|
CoercedValue,
|
|
Type::getInt16Ty(Ctx));
|
|
|
|
CoercedValue = BC2.Builder.CreateCast(
|
|
Instruction::ZExt,
|
|
HalfInt,
|
|
Type::getInt32Ty(Ctx));
|
|
}
|
|
else if (Overload.tag == int16ValueIndicator)
|
|
{
|
|
CoercedValue = BC2.Builder.CreateCast(
|
|
Instruction::ZExt,
|
|
CoercedValue,
|
|
Type::getInt32Ty(Ctx));
|
|
}
|
|
|
|
Instrument(
|
|
BC2,
|
|
BC2.HlslOP->GetI32Const(Overload.tag),
|
|
GroupIdXandY,
|
|
GroupIdZ,
|
|
Call->getOperand(1),
|
|
Call->getOperand(2),
|
|
ColumnIndex,
|
|
CoercedValue,
|
|
Call->getOperand(5));
|
|
}
|
|
}
|
|
|
|
DM.ReEmitDxilResources();
|
|
|
|
return true;
|
|
}
|
|
|
|
char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
|
|
|
|
ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation()
|
|
{
|
|
return new DxilPIXMeshShaderOutputInstrumentation();
|
|
}
|
|
|
|
INITIALIZE_PASS(DxilPIXMeshShaderOutputInstrumentation,
|
|
"hlsl-dxil-pix-meshshader-output-instrumentation",
|
|
"DXIL mesh shader output instrumentation for PIX", false, false)
|