User/jeffnn/pix dontoverwriteoffsetcounter (#2729)

Fix for overflow case
Overloads for StoreVertexOutput
Reformat to remove curly-on-end
This commit is contained in:
Jeff Noyle 2020-03-02 09:23:26 -08:00 коммит произвёл GitHub
Родитель 713c80ce4e
Коммит be3f3fa2ee
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
1 изменённых файлов: 102 добавлений и 51 удалений

Просмотреть файл

@ -31,16 +31,20 @@
// Keep this in sync with the same-named value in the debugger application's
// WinPixShaderUtils.h
constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024;
constexpr uint64_t MaxSizePerRecord = 64;
// Keep these in sync with the same-named values in PIX's MeshShaderOutput.cpp
constexpr uint32_t triangleIndexIndicator = 1;
constexpr uint32_t int32ValueIndicator = 2;
constexpr uint32_t floatValueIndicator = 3;
constexpr uint32_t int16ValueIndicator = 4;
constexpr uint32_t float16ValueIndicator = 5;
using namespace llvm;
using namespace hlsl;
class DxilPIXMeshShaderOutputInstrumentation : public ModulePass {
class DxilPIXMeshShaderOutputInstrumentation : public ModulePass
{
public:
static char ID; // Pass identification, replacement for typeid
explicit DxilPIXMeshShaderOutputInstrumentation() : ModulePass(ID) {}
@ -75,15 +79,18 @@ private:
template <typename... T> void Instrument(BuilderContext &BC, T... values);
};
void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O) {
void DxilPIXMeshShaderOutputInstrumentation::applyOptions(PassOptions O)
{
GetPassOptionUInt64(O, "UAVSize", &m_UAVSize, 1024 * 1024);
}
uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset() {
uint32_t DxilPIXMeshShaderOutputInstrumentation::UAVDumpingGroundOffset()
{
return static_cast<uint32_t>(m_UAVSize - DebugBufferDumpingGroundSize);
}
CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC)
{
// Set up a UAV with structure of a single int
unsigned int UAVResourceHandle =
static_cast<unsigned int>(BC.DM.GetUAVs().size());
@ -130,7 +137,8 @@ CallInst *DxilPIXMeshShaderOutputInstrumentation::addUAV(BuilderContext &BC) {
}
Value *DxilPIXMeshShaderOutputInstrumentation::
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC) {
insertInstructionsToCalculateFlattenedGroupIdXandY(BuilderContext &BC)
{
Constant *Zero32Arg = BC.HlslOP->GetU32Const(0);
Constant *One32Arg = BC.HlslOP->GetU32Const(1);
@ -150,7 +158,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
}
Value *DxilPIXMeshShaderOutputInstrumentation::
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC) {
insertInstructionsToCalculateGroupIdZ(BuilderContext &BC)
{
Constant *Two32Arg = BC.HlslOP->GetU32Const(2);
auto GroupIdFunc =
BC.HlslOP->GetOpFunc(DXIL::OpCode::GroupId, Type::getInt32Ty(BC.Ctx));
@ -160,9 +169,15 @@ Value *DxilPIXMeshShaderOutputInstrumentation::
}
Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
BuilderContext &BC, uint32_t SpaceInBytes) {
assert(m_RemainingReservedSpaceInBytes ==
0); // or else the previous caller reserved too much space
BuilderContext &BC, uint32_t SpaceInBytes)
{
// Check the previous caller didn't reserve too much space:
assert(m_RemainingReservedSpaceInBytes == 0);
// Check that the caller didn't ask for so much memory that it will
// overwrite the offset counter:
assert(m_RemainingReservedSpaceInBytes < MaxSizePerRecord);
m_RemainingReservedSpaceInBytes = SpaceInBytes;
@ -173,7 +188,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
BC.HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp);
Constant *AtomicAdd =
BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add);
Constant *OffsetArg = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset());
Constant *OffsetArg =
BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + MaxSizePerRecord);
UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx));
Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes);
@ -196,7 +212,8 @@ Value *DxilPIXMeshShaderOutputInstrumentation::reserveDebugEntrySpace(
}
Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
BuilderContext &BC, Value *TheOffset, Value *TheValue) {
BuilderContext &BC, Value *TheOffset, Value *TheValue)
{
Function *StoreValue =
BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(BC.Ctx));
@ -228,18 +245,21 @@ Value *DxilPIXMeshShaderOutputInstrumentation::writeDwordAndReturnNewOffset(
template <typename... T>
void DxilPIXMeshShaderOutputInstrumentation::Instrument(BuilderContext &BC,
T... values) {
T... values)
{
llvm::SmallVector<llvm::Value *, 10> Values(
{static_cast<llvm::Value *>(values)...});
const uint32_t DwordCount = Values.size();
llvm::Value *byteOffset =
reserveDebugEntrySpace(BC, DwordCount * sizeof(uint32_t));
for (llvm::Value *V : Values) {
for (llvm::Value *V : Values)
{
byteOffset = writeDwordAndReturnNewOffset(BC, byteOffset, V);
}
}
bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M)
{
DxilModule &DM = M.GetOrCreateDxilModule();
LLVMContext &Ctx = M.getContext();
OP *HlslOP = DM.GetOP();
@ -259,7 +279,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
auto F = HlslOP->GetOpFunc(DXIL::OpCode::EmitIndices, Type::getVoidTy(Ctx));
auto FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
{
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
@ -273,50 +294,79 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
Call->getOperand(2), Call->getOperand(3), Call->getOperand(4));
}
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getInt32Ty(Ctx));
FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
auto Call = cast<CallInst>(FunctionUser);
IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
struct OutputType
{
Type *type;
uint32_t tag;
};
SmallVector<OutputType, 4> StoreVertexOutputOverloads
{
{Type::getInt32Ty(Ctx), int32ValueIndicator},
{Type::getInt16Ty(Ctx), int16ValueIndicator},
{Type::getFloatTy(Ctx), floatValueIndicator},
{Type::getHalfTy(Ctx), float16ValueIndicator}
};
for (auto const &Overload : StoreVertexOutputOverloads)
{
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Overload.type);
FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();)
{
auto expandBits = BC2.Builder.CreateCast(
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
Instrument(BC2, BC2.HlslOP->GetI32Const(int32ValueIndicator),
GroupIdXandY, GroupIdZ, Call->getOperand(1),
Call->getOperand(2), expandBits, Call->getOperand(4),
Call->getOperand(5));
}
}
auto Call = cast<CallInst>(FunctionUser);
F = HlslOP->GetOpFunc(DXIL::OpCode::StoreVertexOutput, Type::getFloatTy(Ctx));
FunctionUses = F->uses();
for (auto FI = FunctionUses.begin(); FI != FunctionUses.end();) {
auto &FunctionUse = *FI++;
auto FunctionUser = FunctionUse.getUser();
IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
auto Call = cast<CallInst>(FunctionUser);
// Expand column index to 32 bits:
auto ColumnIndex = BC2.Builder.CreateCast(
Instruction::ZExt,
Call->getOperand(3),
Type::getInt32Ty(Ctx));
IRBuilder<> Builder2(Call);
BuilderContext BC2{M, DM, Ctx, HlslOP, Builder2};
// Coerce actual value to int32
Value *CoercedValue = Call->getOperand(4);
{
auto expandBits = BC2.Builder.CreateCast(
Instruction::ZExt, Call->getOperand(3), Type::getInt32Ty(Ctx));
if (Overload.tag == floatValueIndicator)
{
CoercedValue = BC2.Builder.CreateCast(
Instruction::BitCast,
CoercedValue,
Type::getInt32Ty(Ctx));
}
else if (Overload.tag == float16ValueIndicator)
{
auto * HalfInt = BC2.Builder.CreateCast(
Instruction::BitCast,
CoercedValue,
Type::getInt16Ty(Ctx));
auto reinterpretFloatToInt = BC2.Builder.CreateCast(
Instruction::BitCast, Call->getOperand(4), Type::getInt32Ty(Ctx));
CoercedValue = BC2.Builder.CreateCast(
Instruction::ZExt,
HalfInt,
Type::getInt32Ty(Ctx));
}
else if (Overload.tag == int16ValueIndicator)
{
CoercedValue = BC2.Builder.CreateCast(
Instruction::ZExt,
CoercedValue,
Type::getInt32Ty(Ctx));
}
Instrument(BC2, BC2.HlslOP->GetI32Const(floatValueIndicator),
GroupIdXandY, GroupIdZ, Call->getOperand(1),
Call->getOperand(2), expandBits, reinterpretFloatToInt,
Call->getOperand(5));
Instrument(
BC2,
BC2.HlslOP->GetI32Const(Overload.tag),
GroupIdXandY,
GroupIdZ,
Call->getOperand(1),
Call->getOperand(2),
ColumnIndex,
CoercedValue,
Call->getOperand(5));
}
}
@ -327,7 +377,8 @@ bool DxilPIXMeshShaderOutputInstrumentation::runOnModule(Module &M) {
char DxilPIXMeshShaderOutputInstrumentation::ID = 0;
ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation() {
ModulePass *llvm::createDxilDxilPIXMeshShaderOutputInstrumentation()
{
return new DxilPIXMeshShaderOutputInstrumentation();
}