User/jeffnn/pix dontoverwriteoffsetcounter (#2729)
Fix for overflow case Overloads for StoreVertexOutput Reformat to remove curly-on-end
This commit is contained in:
Родитель
713c80ce4e
Коммит
be3f3fa2ee
|
@ -31,16 +31,20 @@
|
|||
// Keep this in sync with the same-named value in the debugger application's
|
||||
// WinPixShaderUtils.h
|
||||
constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
|
||||
constexpr uint64_t MaxSizePerRecord = 64;
|
||||
|
||||
// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
|
||||
constexpr uint32_t triangleIndexIndicator = 1;
|
||||
constexpr uint32_t int32ValueIndicator = 2;
|
||||
constexpr uint32_t floatValueIndicator = 3;
|
||||
constexpr uint32_t int16ValueIndicator = 4;
|
||||
constexpr uint32_t float16ValueIndicator = 5;
|
||||
|
||||
using namespace llvm;
|
||||
using namespace hlsl;
|
||||
|
||||
class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
|
||||
class DxilPIXMeshShaderOutputInstrumentation : public ModulePass
|
||||
{
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
|
||||
|
@ -75,15 +79,18 @@ private:
|
|||
template <typename... T> void Instrument(BuilderContext &BC, T... values);
|
||||
};
|
||||
|
||||
void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
|
||||
void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O)
|
||||
{
|
||||
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
|
||||
}
|
||||
|
||||
uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
|
||||
uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
|
||||
{
|
||||
return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
|
||||
}
|
||||
|
||||
CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
|
||||
CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC)
|
||||
{
|
||||
// Set up a UAV with structure of a single int
|
||||
unsigned int UAVResourceHandle =
|
||||
static_cast<unsigned int>(BC.DM.GetUAVs().size());
|
||||
|
@ -130,7 +137,8 @@ CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
|
|||
}
|
||||
|
||||
Value *DxilPIXMeshShaderOutputInstrumentation::
|
||||
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
|
||||
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
|
||||
{
|
||||
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
|
||||
Constant *One32Arg = BC.HlslOP->GetU32Const(1);
|
||||
|
||||
|
@ -150,7 +158,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
|
|||
}
|
||||
|
||||
Value *DxilPIXMeshShaderOutputInstrumentation::
|
||||
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
|
||||
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC)
|
||||
{
|
||||
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
|
||||
auto GroupIdFunc =
|
||||
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
|
||||
|
@ -160,9 +169,15 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
|
|||
}
|
||||
|
||||
Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
||||
BuilderContext &BC, uint32_t SpaceInBytes) {
|
||||
assert(m_RemainingReservedSpaceInBytes ==
|
||||
0); // or else the previous caller reserved too much space
|
||||
BuilderContext &BC, uint32_t SpaceInBytes)
|
||||
{
|
||||
|
||||
// Check the previous caller didn't reserve too much space:
|
||||
assert(m_RemainingReservedSpaceInBytes == 0);
|
||||
|
||||
// Check that the caller didn't ask for so much memory that it will
|
||||
// overwrite the offset counter:
|
||||
assert(m_RemainingReservedSpaceInBytes < MaxSizePerRecord);
|
||||
|
||||
m_RemainingReservedSpaceInBytes = SpaceInBytes;
|
||||
|
||||
|
@ -173,7 +188,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|||
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
|
||||
Constant *AtomicAdd =
|
||||
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
|
||||
Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
|
||||
Constant *OffsetArg =
|
||||
BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + MaxSizePerRecord);
|
||||
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
|
||||
|
||||
Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
|
||||
|
@ -196,7 +212,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
|
|||
}
|
||||
|
||||
Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
|
||||
BuilderContext &BC, Value *TheOffset, Value *TheValue) {
|
||||
BuilderContext &BC, Value *TheOffset, Value *TheValue)
|
||||
{
|
||||
|
||||
Function *StoreValue =
|
||||
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
|
||||
|
@ -228,18 +245,21 @@ Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
|
|||
|
||||
template <typename... T>
|
||||
void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
|
||||
T... values) {
|
||||
T... values)
|
||||
{
|
||||
llvm::SmallVector<llvm::Value *, 10> Values(
|
||||
{static_cast<llvm::Value *>(values)...});
|
||||
const uint32_t DwordCount = Values.size();
|
||||
llvm::Value *byteOffset =
|
||||
reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
|
||||
for (llvm::Value *V : Values) {
|
||||
for (llvm::Value *V : Values)
|
||||
{
|
||||
byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
|
||||
}
|
||||
}
|
||||
|
||||
bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
||||
bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
|
||||
{
|
||||
DxilModule &DM = M.GetOrCreateDxilModule();
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
OP *HlslOP = DM.GetOP();
|
||||
|
@ -259,7 +279,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|||
|
||||
auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
|
||||
auto FunctionUses = F->uses();
|
||||
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
||||
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
||||
{
|
||||
auto &FunctionUse = *FI++;
|
||||
auto FunctionUser = FunctionUse.getUser();
|
||||
|
||||
|
@ -273,50 +294,79 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|||
Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
|
||||
}
|
||||
|
||||
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
|
||||
FunctionUses = F->uses();
|
||||
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
||||
auto &FunctionUse = *FI++;
|
||||
auto FunctionUser = FunctionUse.getUser();
|
||||
|
||||
auto Call = cast<CallInst>(FunctionUser);
|
||||
|
||||
IRBuilder<> Builder2(Call);
|
||||
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
||||
struct OutputType
|
||||
{
|
||||
Type *type;
|
||||
uint32_t tag;
|
||||
};
|
||||
SmallVector<OutputType, 4> StoreVertexOutputOverloads
|
||||
{
|
||||
{Type::getInt32Ty(Ctx), int32ValueIndicator},
|
||||
{Type::getInt16Ty(Ctx), int16ValueIndicator},
|
||||
{Type::getFloatTy(Ctx), floatValueIndicator},
|
||||
{Type::getHalfTy(Ctx), float16ValueIndicator}
|
||||
};
|
||||
|
||||
for (auto const &Overload : StoreVertexOutputOverloads)
|
||||
{
|
||||
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type);
|
||||
FunctionUses = F->uses();
|
||||
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
|
||||
{
|
||||
auto expandBits = BC2.Builder.CreateCast(
|
||||
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
|
||||
auto &FunctionUse = *FI++;
|
||||
auto FunctionUser = FunctionUse.getUser();
|
||||
|
||||
Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
|
||||
GroupIdXandY, GroupIdZ, Call->getOperand(1),
|
||||
Call->getOperand(2), expandBits, Call->getOperand(4),
|
||||
Call->getOperand(5));
|
||||
}
|
||||
}
|
||||
auto Call = cast<CallInst>(FunctionUser);
|
||||
|
||||
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
|
||||
FunctionUses = F->uses();
|
||||
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
|
||||
auto &FunctionUse = *FI++;
|
||||
auto FunctionUser = FunctionUse.getUser();
|
||||
IRBuilder<> Builder2(Call);
|
||||
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
||||
|
||||
auto Call = cast<CallInst>(FunctionUser);
|
||||
// Expand column index to 32 bits:
|
||||
auto ColumnIndex = BC2.Builder.CreateCast(
|
||||
Instruction::ZExt,
|
||||
Call->getOperand(3),
|
||||
Type::getInt32Ty(Ctx));
|
||||
|
||||
IRBuilder<> Builder2(Call);
|
||||
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
|
||||
// Coerce actual value to int32
|
||||
Value *CoercedValue = Call->getOperand(4);
|
||||
|
||||
{
|
||||
auto expandBits = BC2.Builder.CreateCast(
|
||||
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
|
||||
if (Overload.tag == floatValueIndicator)
|
||||
{
|
||||
CoercedValue = BC2.Builder.CreateCast(
|
||||
Instruction::BitCast,
|
||||
CoercedValue,
|
||||
Type::getInt32Ty(Ctx));
|
||||
}
|
||||
else if (Overload.tag == float16ValueIndicator)
|
||||
{
|
||||
auto * HalfInt = BC2.Builder.CreateCast(
|
||||
Instruction::BitCast,
|
||||
CoercedValue,
|
||||
Type::getInt16Ty(Ctx));
|
||||
|
||||
auto reinterpretFloatToInt = BC2.Builder.CreateCast(
|
||||
Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));
|
||||
CoercedValue = BC2.Builder.CreateCast(
|
||||
Instruction::ZExt,
|
||||
HalfInt,
|
||||
Type::getInt32Ty(Ctx));
|
||||
}
|
||||
else if (Overload.tag == int16ValueIndicator)
|
||||
{
|
||||
CoercedValue = BC2.Builder.CreateCast(
|
||||
Instruction::ZExt,
|
||||
CoercedValue,
|
||||
Type::getInt32Ty(Ctx));
|
||||
}
|
||||
|
||||
Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
|
||||
GroupIdXandY, GroupIdZ, Call->getOperand(1),
|
||||
Call->getOperand(2), expandBits, reinterpretFloatToInt,
|
||||
Call->getOperand(5));
|
||||
Instrument(
|
||||
BC2,
|
||||
BC2.HlslOP->GetI32Const(Overload.tag),
|
||||
GroupIdXandY,
|
||||
GroupIdZ,
|
||||
Call->getOperand(1),
|
||||
Call->getOperand(2),
|
||||
ColumnIndex,
|
||||
CoercedValue,
|
||||
Call->getOperand(5));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,7 +377,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
|
|||
|
||||
char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
|
||||
|
||||
ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
|
||||
ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation()
|
||||
{
|
||||
return new DxilPIXMeshShaderOutputInstrumentation();
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче