diff --git a/include/dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h b/include/dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h index 3af17c42e..a06cfe965 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h +++ b/include/dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h @@ -28,7 +28,7 @@ static constexpr uint32_t ID = 3; void AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI, std::uint32_t InstNum); -bool FromInst(llvm::Instruction *pI, std::uint32_t *pInstNum); +bool FromInst(llvm::Instruction const *pI, std::uint32_t *pInstNum); } // namespace PixDxilInstNum namespace PixDxilReg { @@ -36,7 +36,7 @@ static constexpr char MDName[] = "pix-dxil-reg"; static constexpr uint32_t ID = 0; void AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI, std::uint32_t RegNum); -bool FromInst(llvm::Instruction *pI, std::uint32_t *pRegNum); +bool FromInst(llvm::Instruction const *pI, std::uint32_t *pRegNum); } // namespace PixDxilReg namespace PixAllocaReg { @@ -45,7 +45,7 @@ static constexpr uint32_t ID = 1; void AddMD(llvm::LLVMContext &Ctx, llvm::AllocaInst *pAlloca, std::uint32_t RegNum, std::uint32_t Count); -bool FromInst(llvm::AllocaInst *pAlloca, std::uint32_t *pRegBase, +bool FromInst(llvm::AllocaInst const *pAlloca, std::uint32_t *pRegBase, std::uint32_t *pRegSize); } // namespace PixAllocaReg diff --git a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp index ea93d9968..c85812f34 100644 --- a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp +++ b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp @@ -989,15 +989,6 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, } } -class ScopedInstruction { - llvm::Instruction *m_Instruction; - -public: - ScopedInstruction(llvm::Instruction *I) : m_Instruction(I) {} - ~ScopedInstruction() { delete m_Instruction; } - llvm::Instruction *Get() const { return m_Instruction; } -}; - struct GlobalVariableAndStorage { llvm::DIGlobalVariable *DIGV; OffsetInBits Offset; diff --git a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp index 1337cdfb3..f705d9192 100644 --- a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp +++ b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp @@ -9,6 +9,7 @@ // // /////////////////////////////////////////////////////////////////////////////// +#include #include #include "dxc/DXIL/DxilFunctionProps.h" @@ -34,69 +35,103 @@ using namespace hlsl; // // In summary, instructions are added that cause a "trace" of the execution of // the shader to be written out to a UAV. This trace is then used by a debugger -// application to provide a post-mortem debugging experience that reconstructs -// the execution history of the shader. +// application to provide a postmortem debugging experience that reconstructs +// the execution history of the shader. The caller specifies the power-of-two +// size of the UAV. // -// The trace is only required for a particular shader instance of interest, and +// The instrumentation is added per basic block, and each block will then write +// a contiguous sequence of values into the UAV. +// +// The trace is only required for particular shader instances of interest, and // a branchless mechanism is used to write the trace either to an incrementing -// location within the UAV, or to a "dumping ground" area at the top of the UAV -// if the instance is not of interest. +// location within the UAV, or to a "dumping ground" area in the top half of the +// UAV if the instance is not of interest. // -// The following modifications are made: +// In addition, each half of the UAV is further subdivided: the first quarter is +// the are in which blocks are permitted to start writing their sequence, and +// that sequence is constrained to be no longer than the size of the second +// quarter. This allows us to limit writes to the appropriate half of the UAV +// via a single AND at the beginning of the basic block. An additoinal OR +// provides the offset, either 0 for threads-of-interest, or UAVSize/2 for +// not-of-interest. +// +// Threads determine where to start writing their data by incrementing a DWORD +// that lives at the very top of that thread's half of the UAV. This is done +// because several threads may satisfy the selection criteria (e.g. a pixel +// shader may be invoked several times for a given pixel coordinate if the model +// has overlapping triangles). +// +// A picture of the UAV layout: +// <--------------power-of-two-size-of-UAV----------------> +// [1 ][2 ][3 ][4 ] +// <------A-----> ^ ^ +// B C +// <------D------> +// +// A: the size of the AND for interesting writes. Their payloads extend +// beyond this into area 2, but those payloads are limited to be small +// enough (1/4 UAV size -1) that they don't overwrite B. +// B: The interesting thread's counter. +// C: The uninteresting thread's counter. +// D: Size of the AND for uninteresting threads (same value as A) +// +// The following modifications are made by this pass: // // First, instructions are added to the top of the entry point function that // implement the following: // - Examine the input variables that define the instance of the shader that is -// running. This will -// be SV_Position for pixel shaders, SV_Vertex+SV_Instance for vertex -// shaders, thread id for compute shaders etc. If these system values need to -// be added to the shader, then they are also added to the input signature, -// if appropriate. +// running. This will be SV_Position for pixel shaders, SV_Vertex+SV_Instance +// for vertex shaders, thread id for compute shaders etc. If these system +// values need to be added to the shader, then they are also added to the +// input signature, if appropriate. // - Compare the above variables with the instance of interest defined by the -// invoker of this pass. -// Deduce two values: a multiplicand and an addend that together allow a -// branchless calculation of the offset into the UAV at which to write via -// "offset = offset * multiplicand + addend." If the instance is NOT of -// interest, the multiplicand is zero and the addend is sizeof(UAV)-(a little -// bit), causing writes for uninteresting invocations to end up at the top of -// the UAV. Otherwise the multiplicand is 1 and the addend is 0. +// invoker of this pass. If equal, create an OR value of zero that will +// not affect the block's starting write offset. If not equal, the OR will +// move the writes into the second half of the UAV. // - Calculate an "instance identifier". Even with the above instance -// identification, several invocations may -// end up matching the selection criteria. Specifically, this happens during -// a draw call in which many triangles overlap the pixel of interest. More on -// this below. +// identification, several invocations may end up matching the selection +// criteria. More on this below. // -// During execution, the instrumentation for most instructions cause data to be -// emitted to the UAV. The index at which data is written is identified by -// treating the first uint32 of the UAV as an index which is atomically -// incremented by the instrumentation. The very first value of this counter that +// As mentioned, a counter/offset is maintained at the top of the thread's +// half of the UAV. The very first value of this counter that // is encountered by each invocation is used as the "instance identifier" // mentioned above. That instance identifier is written out with each packet, -// since many pixel shaders executing in parallel will emit interleaved packets, -// and the debugger application uses the identifiers to group packets from each +// since many threads executing in parallel will emit interleaved packets, +// and the debugger application uses the identifiers to gather packets from each // separate invocation together. // -// If an instruction has a non-void and primitive return type, i.e. isn't a -// struct, then the instrumentation will write that value out to the UAV as well -// as part of the "step" data packet. +// In addition to the above, this pass creates a text precis of the structure +// being written out for each basic block. This precis is passed back to the +// caller, and can be used to parse the UAV output later. The precis will +// contain notes about void-type instructions, which won't write anything to the +// UAV, allowing the caller to reconstruct those instructions. +// Some care has to be taken about whether to emit UAV writes after the +// corresponding instruction or before. Terminators must emit their UAV data +// before the terminator itself, of course. Phi instructions get special +// treatment also: their instrumentation has to come after (since phis must be +// the first instructions in the block), but also the instrumentation must +// execute in the same order as the precis specifies, or the caller will mix +// up the phi values. We achieve this by saying that phi instrumentation must +// come before the first non-phi instruction in the block. +// Some blocks will have all-void instructions, so that no debugging +// data is emitted at all. These blocks still produce a precis, and still +// need to be noticed during execution, so an empty block header is emitted +// into the UAV. // -// The limiting size of the UAV is enforced in a branchless way by ANDing the -// offset with a precomputed value that is sizeof(UAV)-64. The actual size of -// the UAV allocated by the caller is required to be a power of two plus 64 for -// this reason. The caller detects UAV overrun by examining a canary value close -// to the end of the power-of-two size of the UAV. If this value has been -// overwritten, the debug session is deemed to have overflowed the UAV. The -// caller will than allocate a UAV that is twice the size and try again, up to a -// predefined maximum. - -// Keep these in sync with the same-named value in the debugger application's -// WinPixShaderUtils.h - -constexpr uint64_t DebugBufferDumpingGroundSize = 64 * 1024; -// The actual max size per record is much smaller than this, but it never -// hurts to be generous. -constexpr size_t CounterOffsetBeyondUsefulData = - DebugBufferDumpingGroundSize / 2; +// Error conditions: +// Overflow of the debug output from the interesting threads will start to +// overwrite their own area of the UAV (after the AND limits those writes +// to the lower half of the UAV (thus, by the way, avoiding overwriting +// their counter value)). The caller must check the counter value after +// the debugging run is complete to see if this happened, and if so, increase +// the UAV size and try again. +// Uninteresting threads use an AND value that limits their writes to the +// upper half of the UAV and can be entirely ignored by the caller. +// Since a sufficiently-large block is guaranteed to overflow the UAV, +// the precis-creation can exit early and report this "static" overflow +// condition to the caller. +// In all overflow cases, the caller is expected to try to instrument again, +// with a larger UAV. // These definitions echo those in the debugger application's // debugshaderrecord.h file @@ -110,7 +145,12 @@ enum DebugShaderModifierRecordType { DebugShaderModifierRecordTypeRegisterRelativeIndex0, DebugShaderModifierRecordTypeRegisterRelativeIndex1, DebugShaderModifierRecordTypeRegisterRelativeIndex2, - DebugShaderModifierRecordTypeDXILStepTerminator = 250, + // Note that everything above this line is no longer used, but is kept + // here in order to keep this file more in-sync with the debugger source. + // (As of this writing, the debugger still supports older versions of this + // pass which produced finer-grained debug packets.) + DebugShaderModifierRecordTypeDXILStepBlock = 249, + DebugShaderModifierRecordTypeDXILStepRet = 250, DebugShaderModifierRecordTypeDXILStepVoid = 251, DebugShaderModifierRecordTypeDXILStepFloat = 252, DebugShaderModifierRecordTypeDXILStepUint32 = 253, @@ -150,6 +190,20 @@ struct DebugShaderModifierRecordDXILStepBase { uint32_t InstructionOffset; }; +struct DebugShaderModifierRecordDXILBlock { + union { + struct { + uint32_t NotUsed0 : 4; + uint32_t NotUsed1 : 4; + uint32_t Type : 8; + uint32_t CountOfInstructions : 16; + } Details; + uint32_t u32Header; + } Header; + uint32_t UID; + uint32_t FirstInstructionOrdinal; +}; + template struct DebugShaderModifierRecordDXILStep : public DebugShaderModifierRecordDXILStepBase { @@ -174,6 +228,16 @@ DebugShaderModifierRecordPayloadSizeDwords(size_t recordTotalSizeBytes) { sizeof(uint32_t)); } +struct InstructionAndType { + Instruction *Inst; + std::uint32_t InstructionOrdinal; + DebugShaderModifierRecordType Type; + std::uint32_t RegisterNumber; + std::uint32_t AllocaBase; + Value *AllocaWriteIndex = nullptr; + std::optional ConstantAllocaStoreValue; +}; + class DxilDebugInstrumentation : public ModulePass { private: @@ -220,23 +284,19 @@ private: uint64_t m_UAVSize = 1024 * 1024; struct PerFunctionValues { CallInst *UAVHandle = nullptr; - Constant *CounterOffset = nullptr; + Instruction *CounterOffset = nullptr; Value *InvocationId = nullptr; // Together these two values allow branchless writing to the UAV. An // invocation of the shader is either of interest or not (e.g. it writes to // the pixel the user selected for debugging or it doesn't). If not of // interest, debugging output will still occur, but it will be relegated to - // the very top few bytes of the UAV. Invocations of interest, by contrast, + // the top half of the UAV. Invocations of interest, by contrast, // will be written to the UAV at sequentially increasing offsets. - // This value will either be one or zero (one if the invocation is of - // interest, zero otherwise) - Value *OffsetMultiplicand = nullptr; - // This will either be zero (if the invocation is of interest) or - // (UAVSize)-(SmallValue) if not. - Value *OffsetAddend = nullptr; - Constant *OffsetMask = nullptr; + Value *OffsetMask = nullptr; + Instruction *OffsetOr = nullptr; Value *SelectionCriterion = nullptr; Value *CurrentIndex = nullptr; + std::vector AddedBlocksToIgnoreForInstrumentation; }; std::map m_FunctionToValues; @@ -275,20 +335,37 @@ private: SystemValueIndices SVIndices); Value *addHullhaderProlog(BuilderContext &BC); Value *addComparePrimitiveIdProlog(BuilderContext &BC, unsigned SVIndices); - void addDebugEntryValue(BuilderContext &BC, Value *TheValue); + uint32_t addDebugEntryValue(BuilderContext &BC, Value *TheValue); void addInvocationStartMarker(BuilderContext &BC); + void determineLimitANDAndInitializeCounter(BuilderContext &BC); void reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInDwords); - void addStoreStepDebugEntry(BuilderContext &BC, StoreInst *Inst); - void addStepDebugEntry(BuilderContext &BC, Instruction *Inst); - void addStepDebugEntryValue(BuilderContext &BC, std::uint32_t InstNum, - Value *V, std::uint32_t ValueOrdinal, - Value *ValueOrdinalIndex); + std::optional addStoreStepDebugEntry(BuilderContext *BC, + StoreInst *Inst); + std::optional addStepDebugEntry(BuilderContext *BC, + Instruction *Inst); + std::optional + addStepDebugEntryValue(BuilderContext *BC, std::uint32_t InstNum, Value *V, + std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex); uint32_t UAVDumpingGroundOffset(); template void addStepEntryForType(DebugShaderModifierRecordType RecordType, BuilderContext &BC, std::uint32_t InstNum, Value *V, std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex); + struct InstructionToInstrument { + Value *ValueToWriteToDebugMemory; + DebugShaderModifierRecordType ValueType; + Instruction *InstructionAfterWhichToAddInstrumentation; + Instruction *InstructionBeforeWhichToAddInstrumentation; + }; + struct BlockInstrumentationData { + uint32_t FirstInstructionOrdinalInBlock; + std::vector Instructions; + }; + BlockInstrumentationData FindInstrumentableInstructionsInBlock(BasicBlock &BB, + OP *HlslOP); + uint32_t + CountBlockPayloadBytes(std::vector const &IsAndTs); }; void DxilDebugInstrumentation::applyOptions(PassOptions O) { @@ -302,7 +379,7 @@ void DxilDebugInstrumentation::applyOptions(PassOptions O) { } uint32_t DxilDebugInstrumentation::UAVDumpingGroundOffset() { - return static_cast(m_UAVSize - DebugBufferDumpingGroundSize); + return static_cast(m_UAVSize / 2); } static unsigned FindOrAddInputSignatureElement( @@ -653,23 +730,87 @@ void DxilDebugInstrumentation::addInvocationSelectionProlog( assert(false); // guaranteed by runOnModule } - // This is a convenient place to calculate the values that modify the UAV - // offset for invocations of interest and for UAV size. auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; - values.OffsetMultiplicand = - BC.Builder.CreateCast(Instruction::CastOps::ZExt, ParameterTestResult, - Type::getInt32Ty(BC.Ctx), "OffsetMultiplicand"); - auto InverseOffsetMultiplicand = - BC.Builder.CreateSub(BC.HlslOP->GetU32Const(1), values.OffsetMultiplicand, - "ComplementOfMultiplicand"); - values.OffsetAddend = - BC.Builder.CreateMul(BC.HlslOP->GetU32Const(UAVDumpingGroundOffset()), - InverseOffsetMultiplicand, "OffsetAddend"); - values.OffsetMask = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() - 1); - values.SelectionCriterion = ParameterTestResult; } +void DxilDebugInstrumentation::determineLimitANDAndInitializeCounter( + BuilderContext &BC) { + + auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; + + // Split the block at the current insertion point. Insert a conditional + // branch that will invoke one of two new blocks depending on if this + // is a thread-of-interest. The two different classes of thread will + // then be given different limiting AND values within these new + // blocks. + + BasicBlock *RestOfMainBlock = BC.Builder.GetInsertBlock()->splitBasicBlock( + *BC.Builder.GetInsertPoint()); + + // Up to this split point is a new block that we don't need to instrument: + values.AddedBlocksToIgnoreForInstrumentation.push_back( + BC.Builder.GetInsertBlock()); + + auto *InterestingInvocationBlock = BasicBlock::Create( + BC.Ctx, "PIXInterestingBlock", BC.Builder.GetInsertBlock()->getParent(), + RestOfMainBlock); + values.AddedBlocksToIgnoreForInstrumentation.push_back( + InterestingInvocationBlock); + IRBuilder<> BuilderForInteresting(InterestingInvocationBlock); + BuilderForInteresting.CreateBr(RestOfMainBlock); + + auto *NonInterestingInvocationBlock = BasicBlock::Create( + BC.Ctx, "PIXNonInterestingBlock", + BC.Builder.GetInsertBlock()->getParent(), RestOfMainBlock); + values.AddedBlocksToIgnoreForInstrumentation.push_back( + NonInterestingInvocationBlock); + + IRBuilder<> BuilderForNonInteresting(NonInterestingInvocationBlock); + BuilderForNonInteresting.CreateBr(RestOfMainBlock); + + // Connect these new blocks as necessary: + BC.Builder.SetInsertPoint(BC.Builder.GetInsertBlock()->getTerminator()); + BC.Builder.CreateCondBr(values.SelectionCriterion, InterestingInvocationBlock, + NonInterestingInvocationBlock); + BC.Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); + + values.OffsetMask = BC.HlslOP->GetU32Const(m_UAVSize / 4 - 1); + + // Now add a phi that selects between two constant OR values based on + // which branch the thread followed above (interesting or not). + // The OR will either place the output in the lower half or the upper + // half of the UAV. + BC.Builder.SetInsertPoint(RestOfMainBlock->getFirstInsertionPt()); + auto *PHIForOr = + BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXOffsetOr"); + PHIForOr->addIncoming(BC.HlslOP->GetU32Const(0), InterestingInvocationBlock); + PHIForOr->addIncoming(BC.HlslOP->GetU32Const(m_UAVSize / 2), + NonInterestingInvocationBlock); + values.OffsetOr = PHIForOr; + + auto *PHIForCounterOffset = + BC.Builder.CreatePHI(Type::getInt32Ty(BC.Ctx), 2, "PIXCounterLocation"); + const uint32_t InterestingCounterOffset = + static_cast(m_UAVSize / 2 - 1); + PHIForCounterOffset->addIncoming( + BC.HlslOP->GetU32Const(InterestingCounterOffset), + InterestingInvocationBlock); + const uint32_t UninterestingCounterOffsetValue = + static_cast(m_UAVSize - 1); + PHIForCounterOffset->addIncoming( + BC.HlslOP->GetU32Const(UninterestingCounterOffsetValue), + NonInterestingInvocationBlock); + values.CounterOffset = PHIForCounterOffset; + + // These are reported to the caller so there are fewer assumptions made by the + // caller about these internal details: + *OSOverride << "InterestingCounterOffset:" + << std::to_string(InterestingCounterOffset) << "\n"; + *OSOverride << "OverflowThreshold:" << std::to_string(m_UAVSize / 4 - 1) + << "\n"; +} + void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, uint32_t SpaceInBytes) { auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; @@ -687,11 +828,7 @@ void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, BC.HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(BC.Ctx)); - // so inc will be zero for uninteresting invocations: Constant *Increment = BC.HlslOP->GetU32Const(SpaceInBytes); - Value *IncrementForThisInvocation = BC.Builder.CreateMul( - Increment, values.OffsetMultiplicand, "IncrementForThisInvocation"); - auto PreviousValue = BC.Builder.CreateCall( AtomicOpFunc, { @@ -699,10 +836,10 @@ void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, values.UAVHandle, // %dx.types.Handle, ; resource handle AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, // XOR, IMIN, IMAX, UMIN, UMAX - values.CounterOffset, // i32, ; coordinate c0: index in bytes - UndefArg, // i32, ; coordinate c1 (unused) - UndefArg, // i32, ; coordinate c2 (unused) - IncrementForThisInvocation, // i32); increment value + values.CounterOffset, // i32, ; coordinate c0: index in bytes + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + Increment, // i32); increment value }, "UAVIncResult"); @@ -710,22 +847,18 @@ void DxilDebugInstrumentation::reserveDebugEntrySpace(BuilderContext &BC, values.InvocationId = PreviousValue; } - auto MaskedForLimit = BC.Builder.CreateAnd(PreviousValue, values.OffsetMask, - "MaskedForUAVLimit"); - // The return value will either end up being itself (multiplied by one and - // added with zero) or the "dump uninteresting things here" value of (UAVSize - // - a bit). - auto MultipliedForInterest = BC.Builder.CreateMul( - MaskedForLimit, values.OffsetMultiplicand, "MultipliedForInterest"); - auto AddedForInterest = BC.Builder.CreateAdd( - MultipliedForInterest, values.OffsetAddend, "AddedForInterest"); - values.CurrentIndex = AddedForInterest; + auto *Masked = BC.Builder.CreateAnd(PreviousValue, values.OffsetMask, + "MaskedForUAVLimit"); + values.CurrentIndex = + BC.Builder.CreateOr(Masked, values.OffsetOr, "ORedForUAVStart"); } -void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, - Value *TheValue) { +uint32_t DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, + Value *TheValue) { assert(m_RemainingReservedSpaceInBytes > 0); + uint32_t BytesToBeEmitted = 0; + auto TheValueTypeID = TheValue->getType()->getTypeID(); if (TheValueTypeID == Type::TypeID::DoubleTyID) { Function *SplitDouble = @@ -741,6 +874,7 @@ void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding addDebugEntryValue(BC, LowBits); addDebugEntryValue(BC, HighBits); + BytesToBeEmitted += 8; } else if (TheValueTypeID == Type::TypeID::IntegerTyID && TheValue->getType()->getIntegerBitWidth() == 64) { auto LowBits = @@ -751,16 +885,17 @@ void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, // addDebugEntryValue(BC, BC.HlslOP->GetU32Const(0)); // padding addDebugEntryValue(BC, LowBits); addDebugEntryValue(BC, HighBits); + BytesToBeEmitted += 8; } else if (TheValueTypeID == Type::TypeID::IntegerTyID && (TheValue->getType()->getIntegerBitWidth() == 16 || TheValue->getType()->getIntegerBitWidth() == 1)) { auto As32 = BC.Builder.CreateZExt(TheValue, Type::getInt32Ty(BC.Ctx), "As32"); - addDebugEntryValue(BC, As32); + BytesToBeEmitted += addDebugEntryValue(BC, As32); } else if (TheValueTypeID == Type::TypeID::HalfTyID) { auto AsFloat = BC.Builder.CreateFPCast(TheValue, Type::getFloatTy(BC.Ctx), "AsFloat"); - addDebugEntryValue(BC, AsFloat); + BytesToBeEmitted += addDebugEntryValue(BC, AsFloat); } else { Function *StoreValue = BC.HlslOP->GetOpFunc(OP::OpCode::BufferStore, @@ -777,6 +912,7 @@ void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, // The above are the only two valid types for a UAV store assert(false); } + BytesToBeEmitted += 4; Constant *WriteMask_X = BC.HlslOP->GetI8Const(1); auto &values = m_FunctionToValues[BC.Builder.GetInsertBlock()->getParent()]; @@ -792,8 +928,8 @@ void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, UndefArg, // unused values WriteMask_X}); + assert(m_RemainingReservedSpaceInBytes >= 4); // check for underflow m_RemainingReservedSpaceInBytes -= 4; - assert(m_RemainingReservedSpaceInBytes < 1024); // check for underflow if (m_RemainingReservedSpaceInBytes != 0) { values.CurrentIndex = @@ -802,6 +938,8 @@ void DxilDebugInstrumentation::addDebugEntryValue(BuilderContext &BC, values.CurrentIndex = nullptr; } } + + return BytesToBeEmitted; } void DxilDebugInstrumentation::addInvocationStartMarker(BuilderContext &BC) { @@ -834,11 +972,9 @@ void DxilDebugInstrumentation::addStepEntryForType( addDebugEntryValue(BC, BC.HlslOP->GetU32Const(step.Header.u32Header)); addDebugEntryValue(BC, values.InvocationId); addDebugEntryValue(BC, BC.HlslOP->GetU32Const(InstNum)); - if (RecordType != DebugShaderModifierRecordTypeDXILStepVoid && - RecordType != DebugShaderModifierRecordTypeDXILStepTerminator) { + RecordType != DebugShaderModifierRecordTypeDXILStepRet) { addDebugEntryValue(BC, V); - IRBuilder<> &B = BC.Builder; Value *VO = BC.HlslOP->GetU32Const(ValueOrdinal << 16); @@ -850,99 +986,199 @@ void DxilDebugInstrumentation::addStepEntryForType( } } -void DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext &BC, - StoreInst *Inst) { +std::optional +DxilDebugInstrumentation::addStoreStepDebugEntry(BuilderContext *BC, + StoreInst *Inst) { std::uint32_t ValueOrdinalBase; std::uint32_t UnusedValueOrdinalSize; llvm::Value *ValueOrdinalIndex; if (!pix_dxil::PixAllocaRegWrite::FromInst(Inst, &ValueOrdinalBase, &UnusedValueOrdinalSize, &ValueOrdinalIndex)) { - return; + return std::nullopt; } std::uint32_t InstNum; if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { - return; + return std::nullopt; } if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst->getValueOperand())) { - return; + return std::nullopt; } - addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), ValueOrdinalBase, - ValueOrdinalIndex); + auto Type = addStepDebugEntryValue(BC, InstNum, Inst->getValueOperand(), + ValueOrdinalBase, ValueOrdinalIndex); + if (Type) { + if (Instruction *ValueAsInst = + dyn_cast(Inst->getValueOperand())) { + uint32_t RegNum = 0; + if (pix_dxil::PixDxilReg::FromInst(ValueAsInst, &RegNum)) { + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = *Type; + ret.RegisterNumber = RegNum; + ret.AllocaBase = ValueOrdinalBase; + ret.AllocaWriteIndex = ValueOrdinalIndex; + return ret; + } + } else if (Constant *ValueAsConst = + dyn_cast(Inst->getValueOperand())) { + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = *Type; + ret.AllocaBase = ValueOrdinalBase; + ret.AllocaWriteIndex = ValueOrdinalIndex; + + switch (ValueAsConst->getType()->getTypeID()) { + case Type::HalfTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + ret.ConstantAllocaStoreValue = dyn_cast(ValueAsConst) + ->getValueAPF() + .bitcastToAPInt() + .getLimitedValue(); + break; + case Type::IntegerTyID: + ret.ConstantAllocaStoreValue = + dyn_cast(ValueAsConst)->getLimitedValue(); + break; + default: + return std::nullopt; + } + return ret; + } + } + return std::nullopt; } -void DxilDebugInstrumentation::addStepDebugEntry(BuilderContext &BC, - Instruction *Inst) { - if (Inst->getOpcode() == Instruction::OtherOps::PHI) { - return; - } +std::optional +DxilDebugInstrumentation::addStepDebugEntry(BuilderContext *BC, + Instruction *Inst) { if (PIXPassHelpers::IsAllocateRayQueryInstruction(Inst)) { - return; + return std::nullopt; } if (auto *St = llvm::dyn_cast(Inst)) { - addStoreStepDebugEntry(BC, St); - return; + return addStoreStepDebugEntry(BC, St); } std::uint32_t InstNum; if (!pix_dxil::PixDxilInstNum::FromInst(Inst, &InstNum)) { - return; + return std::nullopt; + } + + if (auto *Ld = llvm::dyn_cast(Inst)) { + if (llvm::isa(Ld->getPointerOperand())) { + auto *constant = llvm::cast(Ld->getPointerOperand()); + if (constant->getOpcode() == Instruction::GetElementPtr) { + PIXPassHelpers::ScopedInstruction asInstr(constant->getAsInstruction()); + auto *GEP = llvm::cast(asInstr.Get()); + if (GEP->getPointerOperand()->getName().equals("dx.nothing.a")) { + // These debug-only loads are interesting as instructions to + // step though where otherwise no step might exist for the + // given HLSL lines, so we include them in the instrumentation: + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = DebugShaderModifierRecordTypeDXILStepVoid; + return ret; + } + } + } } std::uint32_t RegNum; if (!pix_dxil::PixDxilReg::FromInst(Inst, &RegNum)) { - if (Inst->getOpcode() == Instruction::Ret) - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepTerminator, - BC, InstNum, nullptr, 0, 0); - return; + if (Inst->getOpcode() == Instruction::Ret) { + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepRet, *BC, + InstNum, nullptr, 0, 0); + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = DebugShaderModifierRecordTypeDXILStepRet; + return ret; + } else if (Inst->isTerminator()) { + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepVoid, + *BC, InstNum, nullptr, 0, 0); + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = DebugShaderModifierRecordTypeDXILStepVoid; + return ret; + } + return std::nullopt; } - addStepDebugEntryValue(BC, InstNum, Inst, RegNum, BC.Builder.getInt32(0)); + auto Type = addStepDebugEntryValue(BC, InstNum, Inst, RegNum, + BC ? BC->Builder.getInt32(0) : nullptr); + if (Type) { + InstructionAndType ret{}; + ret.Inst = Inst; + ret.InstructionOrdinal = InstNum; + ret.Type = *Type; + ret.RegisterNumber = RegNum; + return ret; + } + return std::nullopt; } -void DxilDebugInstrumentation::addStepDebugEntryValue( - BuilderContext &BC, std::uint32_t InstNum, Value *V, - std::uint32_t ValueOrdinal, Value *ValueOrdinalIndex) { +std::optional +DxilDebugInstrumentation::addStepDebugEntryValue(BuilderContext *BC, + std::uint32_t InstNum, + Value *V, + std::uint32_t ValueOrdinal, + Value *ValueOrdinalIndex) { const Type::TypeID ID = V->getType()->getTypeID(); switch (ID) { case Type::TypeID::StructTyID: case Type::TypeID::VoidTyID: - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepVoid, BC, - InstNum, V, ValueOrdinal, ValueOrdinalIndex); - break; + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepVoid, *BC, + InstNum, V, ValueOrdinal, ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepVoid; case Type::TypeID::FloatTyID: - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepFloat, BC, - InstNum, V, ValueOrdinal, ValueOrdinalIndex); - break; + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepFloat, + *BC, InstNum, V, ValueOrdinal, + ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepFloat; case Type::TypeID::IntegerTyID: if (V->getType()->getIntegerBitWidth() == 64) { - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepUint64, - BC, InstNum, V, ValueOrdinal, - ValueOrdinalIndex); + if (BC != nullptr) + addStepEntryForType( + DebugShaderModifierRecordTypeDXILStepUint64, *BC, InstNum, V, + ValueOrdinal, ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepUint64; } else { - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepUint32, - BC, InstNum, V, ValueOrdinal, - ValueOrdinalIndex); + if (BC != nullptr) + addStepEntryForType( + DebugShaderModifierRecordTypeDXILStepUint32, *BC, InstNum, V, + ValueOrdinal, ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepUint32; } - break; case Type::TypeID::DoubleTyID: - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepDouble, BC, - InstNum, V, ValueOrdinal, ValueOrdinalIndex); - break; + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepDouble, + *BC, InstNum, V, ValueOrdinal, + ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepDouble; case Type::TypeID::HalfTyID: - addStepEntryForType(DebugShaderModifierRecordTypeDXILStepFloat, BC, - InstNum, V, ValueOrdinal, ValueOrdinalIndex); - break; + if (BC != nullptr) + addStepEntryForType(DebugShaderModifierRecordTypeDXILStepFloat, + *BC, InstNum, V, ValueOrdinal, + ValueOrdinalIndex); + return DebugShaderModifierRecordTypeDXILStepFloat; case Type::TypeID::PointerTyID: // Skip pointer calculation instructions. They aren't particularly // meaningful to the user (being a mere implementation detail for lookup - // tables, etc.), and their type is problematic from a UI point of view. The - // subsequent instructions that dereference the pointer will be properly - // instrumented and show the (meaningful) retrieved value. + // tables, etc.), and their type is problematic from a UI point of view. + // The subsequent instructions that dereference the pointer will be + // properly instrumented and show the (meaningful) retrieved value. break; case Type::TypeID::VectorTyID: // Shows up in "insertelement" in raygen shader? @@ -957,11 +1193,16 @@ void DxilDebugInstrumentation::addStepDebugEntryValue( case Type::TypeID::PPC_FP128TyID: assert(false); } + return std::nullopt; } bool DxilDebugInstrumentation::runOnModule(Module &M) { DxilModule &DM = M.GetOrCreateDxilModule(); + // There is no point running this pass if it can't return its report: + if (OSOverride == nullptr) + return false; + auto ShaderModel = DM.GetShaderModel(); auto shaderKind = ShaderModel->GetKind(); @@ -981,10 +1222,159 @@ bool DxilDebugInstrumentation::runOnModule(Module &M) { return modified; } +struct RecordTypeDatum { + DebugShaderModifierRecordType Type; + uint32_t PayloadSize; + const char *AsString; +}; + +static const RecordTypeDatum RecordTypeData[] = { + {DebugShaderModifierRecordTypeDXILStepRet, 0, "r"}, + {DebugShaderModifierRecordTypeDXILStepVoid, 0, "v"}, + {DebugShaderModifierRecordTypeDXILStepFloat, 4, "f"}, + {DebugShaderModifierRecordTypeDXILStepUint32, 4, "3"}, + {DebugShaderModifierRecordTypeDXILStepUint64, 8, "6"}, + {DebugShaderModifierRecordTypeDXILStepDouble, 8, "d"}}; + +std::optional +FindDatum(DebugShaderModifierRecordType RecordType) { + for (auto const &datum : RecordTypeData) { + if (datum.Type == RecordType) { + return &datum; + } + } + return std::nullopt; +} + +uint32_t DxilDebugInstrumentation::CountBlockPayloadBytes( + std::vector const &IsAndTs) { + uint32_t count = 0; + for (auto const &IandT : IsAndTs) { + auto datum = FindDatum(IandT.ValueType); + if (datum) + count += (*datum)->PayloadSize; + } + return count; +} + +const char *TypeString(InstructionAndType const &IandT) { + auto datum = FindDatum(IandT.Type); + if (datum) + return (*datum)->AsString; + assert(false); + return "v"; +} + +Instruction *FindFirstNonPhiInstruction(Instruction *I) { + while (llvm::isa(I)) + I = I->getNextNode(); + return I; +} + +// This function reports a textual representation of the format +// of the debug data that will be output by the instructions +// added by this pass. +// The string has one or more lines of the exemplary form +// Block#3:5,f,22,a;7,f,22,s,20;9,f,22,s,20;10,f,23,a;12,f,23,s,21; +// The integer after the Block# is the first instruction number in the +// block. +// Instructions are delimited by ; The fields within the instruction +// (delimited by ,) are, in order: +// -instruction ordinal +// -data type (r=ret, v=void, f=float, 3=int32, 6=int64, d=double) +// -scalar register number +// -alloca/scalar indicator: +// r == ret instruction +// a == scalar is being created and assigned a value, and that +// value is in the debug output. +// s == Existing scalar is being assigned via static alloca index. +// Index is appended to this instruction record. No +// corresponding data in the debug output. +// d == A dynamic index added to the static base index. Base index +// is appended to this record. The corresponding debug entry is +// the dynamic index into that alloca. +// v == A void terminator or other void-valued instruction. No +// corresponding data in the debug output. +// If indicator is "a", a string of the form [base+index] for the alloca +// store location. +// If indicator is "d", a single integer denoting the base for the alloca +// store. +DxilDebugInstrumentation::BlockInstrumentationData +DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock(BasicBlock &BB, + OP *HlslOP) { + BlockInstrumentationData ret{}; + auto &Is = BB.getInstList(); + *OSOverride << "Block#"; + bool FoundFirstInstruction = false; + for (auto &Inst : Is) { + if (!FoundFirstInstruction) { + std::uint32_t InstNum; + if (pix_dxil::PixDxilInstNum::FromInst(&Inst, &InstNum)) { + *OSOverride << std::to_string(InstNum) << ":"; + ret.FirstInstructionOrdinalInBlock = InstNum; + FoundFirstInstruction = true; + } + } + auto IandT = addStepDebugEntry(nullptr, &Inst); + if (IandT) { + InstructionToInstrument DebugOutputForThisInstruction{}; + DebugOutputForThisInstruction.ValueType = IandT->Type; + auto *InsertionPoint = FindFirstNonPhiInstruction(&Inst); + if (InsertionPoint->isTerminator() || llvm::isa(Inst)) + DebugOutputForThisInstruction + .InstructionBeforeWhichToAddInstrumentation = InsertionPoint; + else + DebugOutputForThisInstruction + .InstructionAfterWhichToAddInstrumentation = InsertionPoint; + + const char *IndexingToken = nullptr; + std::optional RegisterOrStaticIndex; + if (IandT->Type == DebugShaderModifierRecordTypeDXILStepRet) { + IndexingToken = "r"; + } else if (IandT->Type == DebugShaderModifierRecordTypeDXILStepVoid) { + IndexingToken = "v"; // void instruction, no debug output required + } else if (IandT->AllocaWriteIndex != nullptr) { + if (ConstantInt *IndexAsConstant = + dyn_cast(IandT->AllocaWriteIndex)) { + RegisterOrStaticIndex = + std::to_string(IandT->AllocaBase) + "+" + + std::to_string(IndexAsConstant->getLimitedValue()); + IndexingToken = "s"; // static indexing, no debug output required + } else { + IndexingToken = "d"; // dynamic indexing + RegisterOrStaticIndex = std::to_string(IandT->AllocaBase); + DebugOutputForThisInstruction.ValueToWriteToDebugMemory = + IandT->AllocaWriteIndex; + } + } else { + IndexingToken = "a"; // meaning an SSA assignment + // todo: Can SSA Values be assigned a literal constant? + DebugOutputForThisInstruction.ValueToWriteToDebugMemory = IandT->Inst; + } + + *OSOverride << std::to_string(IandT->InstructionOrdinal) << "," + << TypeString(*IandT) << "," + << std::to_string(IandT->RegisterNumber) << "," + << IndexingToken; + if (RegisterOrStaticIndex) { + *OSOverride << "," << *RegisterOrStaticIndex; + } + if (IandT->ConstantAllocaStoreValue) { + *OSOverride << "," << std::to_string(*IandT->ConstantAllocaStoreValue); + } + *OSOverride << ";"; + if (DebugOutputForThisInstruction.ValueToWriteToDebugMemory) + ret.Instructions.push_back(std::move(DebugOutputForThisInstruction)); + } + } + *OSOverride << "\n"; + return ret; +} + bool DxilDebugInstrumentation::RunOnFunction(Module &M, DxilModule &DM, - llvm::Function *entryFunction) { + llvm::Function *function) { DXIL::ShaderKind shaderKind = - PIXPassHelpers::GetFunctionShaderKind(DM, entryFunction); + PIXPassHelpers::GetFunctionShaderKind(DM, function); switch (shaderKind) { case DXIL::ShaderKind::Amplification: @@ -1005,34 +1395,7 @@ bool DxilDebugInstrumentation::RunOnFunction(Module &M, DxilModule &DM, return false; } - // First record pointers to all instructions in the function: - std::vector AllInstructions; - for (inst_iterator I = inst_begin(entryFunction), E = inst_end(entryFunction); - I != E; ++I) { - std::uint32_t InstructionNumber; - if (pix_dxil::PixDxilInstNum::FromInst(&*I, &InstructionNumber)) { - if (InstructionNumber < m_FirstInstruction || - InstructionNumber >= m_LastInstruction) - continue; - AllInstructions.push_back(&*I); - } - } - - // Branchless instrumentation requires taking care of a few things: - // -Each invocation of the shader will be either of interest or not of - // interest - // -If of interest, the offset into the output UAV will be as expected - // -If not, the offset is forced to (UAVsize) - (Small Amount), and that - // output is ignored by the CPU-side code. - // -The invocation of interest may overflow the UAV. This is handled by taking - // the modulus of the - // output index. Overflow is then detected on the CPU side by checking for - // the presence of a canary value at (UAVSize) - (Small Amount) * 2 (which is - // actually a conservative definition of overflow). - // - - Instruction *firstInsertionPt = - dxilutil::FirstNonAllocaInsertionPt(entryFunction); + Instruction *firstInsertionPt = dxilutil::FirstNonAllocaInsertionPt(function); IRBuilder<> Builder(firstInsertionPt); LLVMContext &Ctx = M.getContext(); @@ -1061,100 +1424,81 @@ bool DxilDebugInstrumentation::RunOnFunction(Module &M, DxilModule &DM, values.UAVHandle = PIXPassHelpers::CreateUAV(DM, Builder, UAVRegisterId, "PIX_DebugUAV_Handle"); - values.CounterOffset = BC.HlslOP->GetU32Const(UAVDumpingGroundOffset() + - CounterOffsetBeyondUsefulData); auto SystemValues = addRequiredSystemValues(BC, shaderKind); addInvocationSelectionProlog(BC, SystemValues, shaderKind); + determineLimitANDAndInitializeCounter(BC); addInvocationStartMarker(BC); - // Explicitly name new blocks in order to provide stable names for testing - // purposes - int NewBlockCounter = 0; + // Instrument original instructions: + for (auto &BB : function->getBasicBlockList()) { + if (std::find(values.AddedBlocksToIgnoreForInstrumentation.begin(), + values.AddedBlocksToIgnoreForInstrumentation.end(), + &BB) == values.AddedBlocksToIgnoreForInstrumentation.end()) { + auto BlockInstrumentation = + FindInstrumentableInstructionsInBlock(BB, BC.HlslOP); + if (BlockInstrumentation.FirstInstructionOrdinalInBlock < + m_FirstInstruction || + BlockInstrumentation.FirstInstructionOrdinalInBlock >= + m_LastInstruction) + continue; + uint32_t BlockPayloadBytes = + CountBlockPayloadBytes(BlockInstrumentation.Instructions); + // If the block has no instructions which require debug output, + // we will still write an empty block header at the end of that + // block (i.e. before the terminator) so that the instrumentation + // at least indicates that flow control went through the block. + Instruction *BlockInstrumentationStart = (BB).getTerminator(); + if (!BlockInstrumentation.Instructions.empty()) { + auto const &First = BlockInstrumentation.Instructions[0]; + if (First.InstructionAfterWhichToAddInstrumentation != nullptr) + BlockInstrumentationStart = + First.InstructionAfterWhichToAddInstrumentation; + else if (First.InstructionBeforeWhichToAddInstrumentation != nullptr) + BlockInstrumentationStart = + First.InstructionBeforeWhichToAddInstrumentation; + else { + assert(false); + continue; + } + } + IRBuilder<> Builder(BlockInstrumentationStart); + BuilderContext BCForBlock{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; - auto &Blocks = entryFunction->getBasicBlockList(); - for (auto &CurrentBlock : Blocks) { - struct ValueAndPhi { - Value *Val; - PHINode *Phi; - unsigned Index; - }; - - std::map> InsertableEdges; - auto &Is = CurrentBlock.getInstList(); - for (auto &Inst : Is) { - if (Inst.getOpcode() != Instruction::OtherOps::PHI) { + DebugShaderModifierRecordDXILBlock step = {}; + auto FullRecordSize = + static_cast(sizeof(step) + BlockPayloadBytes); + if (FullRecordSize >= (m_UAVSize / 4) - 1) { + *OSOverride << "StaticOverflow:" << std::to_string(FullRecordSize) + << "\n"; break; } - PHINode &PN = llvm::cast(Inst); - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - BasicBlock *PhiBB = PN.getIncomingBlock(i); - Value *PhiVal = PN.getIncomingValue(i); - InsertableEdges[PhiBB].push_back({PhiVal, &PN, i}); - } - } - - for (auto &InsertableEdge : InsertableEdges) { - auto *NewBlock = BasicBlock::Create( - Ctx, "PIXDebug" + std::to_string(NewBlockCounter++), - InsertableEdge.first->getParent()); - IRBuilder<> Builder(NewBlock); - - auto *PreviousBlock = InsertableEdge.first; - - // Modify all successor operands of the terminator in the previous block - // that match the current block to point to the new block: - TerminatorInst *terminator = PreviousBlock->getTerminator(); - unsigned NumSuccessors = terminator->getNumSuccessors(); - for (unsigned SuccessorIndex = 0; SuccessorIndex < NumSuccessors; - ++SuccessorIndex) { - auto *CurrentSuccessor = terminator->getSuccessor(SuccessorIndex); - if (CurrentSuccessor == &CurrentBlock) { - terminator->setSuccessor(SuccessorIndex, NewBlock); - } - } - - // Modify the Phis and add debug instrumentation - for (auto &ValueNPhi : InsertableEdge.second) { - // Modify the phi to refer to the new block: - ValueNPhi.Phi->setIncomingBlock(ValueNPhi.Index, NewBlock); - - // Add instrumentation to the new block - std::uint32_t RegNum; - if (!pix_dxil::PixDxilReg::FromInst(ValueNPhi.Phi, &RegNum)) { + reserveDebugEntrySpace(BCForBlock, FullRecordSize); + step.Header.Details.CountOfInstructions = + static_cast(BlockInstrumentation.Instructions.size()); + step.Header.Details.Type = + static_cast(DebugShaderModifierRecordTypeDXILStepBlock); + addDebugEntryValue(BCForBlock, + BCForBlock.HlslOP->GetU32Const(step.Header.u32Header)); + addDebugEntryValue(BCForBlock, values.InvocationId); + addDebugEntryValue( + BCForBlock, BCForBlock.HlslOP->GetU32Const( + BlockInstrumentation.FirstInstructionOrdinalInBlock)); + for (auto &Inst : BlockInstrumentation.Instructions) { + Instruction *BuilderInstruction; + if (Inst.InstructionAfterWhichToAddInstrumentation != nullptr) + BuilderInstruction = + Inst.InstructionAfterWhichToAddInstrumentation->getNextNode(); + else if (Inst.InstructionBeforeWhichToAddInstrumentation != nullptr) + BuilderInstruction = Inst.InstructionBeforeWhichToAddInstrumentation; + else { + assert(false); continue; } - - std::uint32_t InstNum; - if (!pix_dxil::PixDxilInstNum::FromInst(ValueNPhi.Phi, &InstNum)) { - continue; - } - if (InstNum < m_FirstInstruction || InstNum >= m_LastInstruction) - continue; - - BuilderContext BC{M, DM, Ctx, HlslOP, Builder}; - addStepDebugEntryValue(BC, InstNum, ValueNPhi.Val, RegNum, - BC.Builder.getInt32(0)); + IRBuilder<> Builder(BuilderInstruction); + BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; + addDebugEntryValue(BC2, Inst.ValueToWriteToDebugMemory); } - - // Add a branch to the new block to point to the current block - Builder.CreateBr(&CurrentBlock); - } - } - - // Instrument original instructions: - for (auto &Inst : AllInstructions) { - // Instrumentation goes after the instruction if it is not a terminator. - // Otherwise, Instrumentation goes prior to the instruction. - if (!Inst->isTerminator()) { - IRBuilder<> Builder(Inst->getNextNode()); - BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; - addStepDebugEntry(BC2, Inst); - } else { - // Insert before this instruction - IRBuilder<> Builder(Inst); - BuilderContext BC2{BC.M, BC.DM, BC.Ctx, BC.HlslOP, Builder}; - addStepDebugEntry(BC2, Inst); } } diff --git a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp index 7fd4bb152..f68e2082b 100644 --- a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp +++ b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp @@ -33,7 +33,7 @@ void pix_dxil::PixDxilInstNum::AddMD(llvm::LLVMContext &Ctx, llvm::ConstantAsMetadata::get(B.getInt32(InstNum))})); } -bool pix_dxil::PixDxilInstNum::FromInst(llvm::Instruction *pI, +bool pix_dxil::PixDxilInstNum::FromInst(llvm::Instruction const *pI, std::uint32_t *pInstNum) { *pInstNum = 0; @@ -73,7 +73,7 @@ void pix_dxil::PixDxilReg::AddMD(llvm::LLVMContext &Ctx, llvm::Instruction *pI, llvm::ConstantAsMetadata::get(B.getInt32(RegNum))})); } -bool pix_dxil::PixDxilReg::FromInst(llvm::Instruction *pI, +bool pix_dxil::PixDxilReg::FromInst(llvm::Instruction const *pI, std::uint32_t *pRegNum) { *pRegNum = 0; @@ -141,7 +141,7 @@ void pix_dxil::PixAllocaReg::AddMD(llvm::LLVMContext &Ctx, llvm::ConstantAsMetadata::get(B.getInt32(Count))})); } -bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst *pAlloca, +bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst const *pAlloca, std::uint32_t *pRegBase, std::uint32_t *pRegSize) { *pRegBase = 0; diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index a093b2fc2..592aca921 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -37,9 +37,10 @@ using namespace llvm; using namespace hlsl; namespace PIXPassHelpers { -bool IsAllocateRayQueryInstruction(llvm::Value *Val) { +bool IsAllocateRayQueryInstruction(llvm::Value const *Val) { if (Val != nullptr) { - if (llvm::Instruction *Inst = llvm::dyn_cast(Val)) { + if (llvm::Instruction const *Inst = + llvm::dyn_cast(Val)) { return hlsl::OP::IsDxilOpFuncCallInst(Inst, hlsl::OP::OpCode::AllocateRayQuery); } diff --git a/lib/DxilPIXPasses/PixPassHelpers.h b/lib/DxilPIXPasses/PixPassHelpers.h index 7d94a3142..406bc086d 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.h +++ b/lib/DxilPIXPasses/PixPassHelpers.h @@ -20,7 +20,17 @@ #endif namespace PIXPassHelpers { -bool IsAllocateRayQueryInstruction(llvm::Value *Val); + +class ScopedInstruction { + llvm::Instruction *m_Instruction; + +public: + ScopedInstruction(llvm::Instruction *I) : m_Instruction(I) {} + ~ScopedInstruction() { delete m_Instruction; } + llvm::Instruction *Get() const { return m_Instruction; } +}; + +bool IsAllocateRayQueryInstruction(llvm::Value const *Val); llvm::CallInst *CreateUAV(hlsl::DxilModule &DM, llvm::IRBuilder<> &Builder, unsigned int registerId, const char *name); llvm::CallInst *CreateHandleForResource(hlsl::DxilModule &DM, diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl index 8c29a59ef..664778a4c 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugBasic.hlsl @@ -1,4 +1,4 @@ -// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation | %FileCheck %s +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,UAVSize=128 | %FileCheck %s // Check that the basic starting header is present: @@ -10,17 +10,18 @@ // CHECK: %CompareToX = icmp eq i32 %XIndex, 0 // CHECK: %CompareToY = icmp eq i32 %YIndex, 0 // CHECK: %ComparePos = and i1 %CompareToX, %CompareToY -// CHECK: %OffsetMultiplicand = zext i1 %ComparePos to i32 -// CHECK: %ComplementOfMultiplicand = sub i32 1, %OffsetMultiplicand -// CHECK: %OffsetAddend = mul i32 983040, %ComplementOfMultiplicand -// CHECK: %IncrementForThisInvocation = mul i32 8, %OffsetMultiplicand -// Check the first instruction was instrumented: -// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0 -// CHECK: %MaskedForUAVLimit = and i32 %UAVIncResult, 983039 -// CHECK: %MultipliedForInterest = mul i32 %MaskedForUAVLimit, %OffsetMultiplicand -// CHECK: %AddedForInterest = add i32 %MultipliedForInterest, %OffsetAddend -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest + +// Check for branches-for-interest and AND value and counter location for a UAV size of 128 +// CHECK: br i1 %ComparePos, label %PIXInterestingBlock, label %PIXNonInterestingBlock +// CHECK: %PIXOffsetOr = phi i32 [ 0, %PIXInterestingBlock ], [ 64, %PIXNonInterestingBlock ] +// CHECK: %PIXCounterLocation = phi i32 [ 63, %PIXInterestingBlock ], [ 127, %PIXNonInterestingBlock ] + +// Check the first block header was emitted: (increment, AND + OR) +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0 +// CHECK: and i32 +// CHECK: or i32 + [RootSignature("")] diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugFlowControl.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugFlowControl.hlsl index 38b2d018e..1970404dd 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugFlowControl.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugFlowControl.hlsl @@ -2,15 +2,31 @@ // Check that flow control constructs don't break the instrumentation. -// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0 +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle, i32 0 -// CHECK: %MaskedForUAVLimit3 = and i32 %UAVIncResult2, 983039 +// There should be several blocks that have instrumentation: -// CHECK: %MultipliedForInterest4 = mul i32 %MaskedForUAVLimit3, %OffsetMultiplicand +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle -// CHECK: %AddedForInterest5 = add i32 %MultipliedForInterest4, %OffsetAddend +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle + +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle + +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle + +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle + +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle + +// CHECK: ; preds = +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 %AddedForInterest5 struct VS_OUTPUT_ENV { diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugInstrumentRet.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugInstrumentRet.hlsl deleted file mode 100644 index 536d5b176..000000000 --- a/tools/clang/test/HLSLFileCheck/pix/DebugInstrumentRet.hlsl +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %dxc -T ps_6_3 %s | %opt -S -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,parameter0=10,parameter1=20,parameter2=30 | %FileCheck %s - - - - -// The ret's instruction number should be 4 (the last integer on this line): -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, i32 {{.*}}, i32 undef, i32 4 -// But we'll check that instruction number: -// CHECK: ret void, !pix-dxil-inst-num [[RetInstNum:![0-9]+]] -// CHECK-DAG: [[RetInstNum]] = !{i32 3, i32 4} - - -float4 main() : SV_Target { - return float4(0, 0, 0, 0); -} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugLimitedInstructionOverrides.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugLimitedInstructionOverrides.hlsl index 037049b62..7ab604134 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugLimitedInstructionOverrides.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugLimitedInstructionOverrides.hlsl @@ -1,19 +1,19 @@ // The PIX debug instrumentation pass takes optional arguments that limit the range of instruction numbers that will be instrumented. // (This is to cope with extremely large shaders, the instrumentation of which will break, either by out-of-memory or by TDRing when run.) -// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,FirstInstruction=6,LastInstruction=9 | %FileCheck %s +// RUN: %dxc -EFlowControlPS -Tps_6_0 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,FirstInstruction=4,LastInstruction=20 | %FileCheck %s -// The only instrumented instructions should have instruction numbers in the range [6,9): -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 6 -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 7 -// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle, {{.*}}, i32 undef, i32 8 +// The only instrumented blocks should have instruction numbers in the range [4,20): -// Two more stores to finish off the instrumentation for instruction #8: -// CHECK: call void @dx.op.bufferStore.f32 -// CHECK: call void @dx.op.bufferStore.i32 +// Skip over the preamble +// CHECK: switch i32 +// +// Now there should be exactly two more instrumented blocks (two increments of the counter UAV entry) +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle +// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle -// Then no more instrumentation at all: -// CHECK-NOT: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %PIX_DebugUAV_Handle +// Then no more instrumentation at all (i.e. no more increments of the counter UAV entry): +// CHECK-NOT: call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_DebugUAV_Handle struct VS_OUTPUT_ENV { float4 Pos : SV_Position; diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugPhisFor.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugPhisFor.hlsl deleted file mode 100644 index eafb5de7b..000000000 --- a/tools/clang/test/HLSLFileCheck/pix/DebugPhisFor.hlsl +++ /dev/null @@ -1,59 +0,0 @@ -// RUN: %dxc -EForLoopPS -Tps_6_0 %s -Od | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation | %FileCheck %s - -// Ensure that the pass added at the begining of the for body: -// CHECK: br label %PIXDebug -// CHECK: br label %PIXDebug -// CHECK: br label %PIXDebug - -// Followed by lots of new pix debug blocks: - -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - - -struct VS_OUTPUT_ENV { - float4 Pos : SV_Position; - float2 Tex : TEXCOORD0; -}; - -uint i32; - -float4 ForLoopPS(VS_OUTPUT_ENV input) : SV_Target { - float4 ret = float4(0, 0, 0, 0); - for (uint i = 0; i < abs(input.Tex.x * 200); ++i) { - ret.x += (float)i32; - if (i + i32 == 0) { - break; - } - ret.y += (float)i32; - if (i + i32 == 1) { - continue; - } - ret.z += (float)i32; - if (i + i32 == 2) { - break; - } - ret.w += (float)i32; - if (i + i32 == 3) { - continue; - } - } - return ret; -} diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugPhisIfElse.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugPhisIfElse.hlsl deleted file mode 100644 index d15a0f6b5..000000000 --- a/tools/clang/test/HLSLFileCheck/pix/DebugPhisIfElse.hlsl +++ /dev/null @@ -1,27 +0,0 @@ -// RUN: %dxc -EFlowControlPS -Tps_6_0 %s -Od | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation | %FileCheck %s - -// Ensure that the pass added a block at the end of this if/else: -// CHECK: br label %PIXDebug -// CHECK: br label %PIXDebug - -// Check that block 0 emits some debug info and returns where we expect: -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// Check that block 1 emits some debug info and returns where we expect: -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - - -float4 FlowControlPS(in uint value : value ) : SV_Target -{ - float4 ret = float4(0, 0, 0, 0); - if (value > 1) { - ret = float4(0, 0, 0, 2); - } else { - ret = float4(0, 0, 0, 1); - } - return ret; -} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugPhisSwicth.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugPhisSwicth.hlsl deleted file mode 100644 index 8d08df97e..000000000 --- a/tools/clang/test/HLSLFileCheck/pix/DebugPhisSwicth.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -// RUN: %dxc -EFlowControlPS -Tps_6_0 %s -Od | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation | %FileCheck %s - -// Check for a branch to a new block for each case: -// CHECK: br label %PIXDebug -// CHECK: br label %PIXDebug -// CHECK: br label %PIXDebug - -// Check that three PIXDebug blocks emit some debug info and returns where we expect: -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// Check that three PIXDebug blocks emit some debug info and returns where we expect: -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - -// Check that three PIXDebug blocks emit some debug info and returns where we expect: -// CHECK: PIXDebug -// CHECK: call i32 @dx.op.atomicBinOp.i32(i32 78 -// CHECK: br label - - -float4 FlowControlPS(in uint value : value ) : SV_Target -{ - float4 ret = float4(0, 0, 0, 0); - switch (value) - { - case 0: - ret = float4(1, 0, 0, 0); - break; - case 1: - ret = float4(2, 0, 0, 0); - break; - default: - ret = float4(3, 0, 0, 0); - break; - } - return ret; -} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugUAVSize.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugUAVSize.hlsl index 5a446dadb..433dca158 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugUAVSize.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugUAVSize.hlsl @@ -1,11 +1,10 @@ -// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,UAVSize=100000 | %FileCheck %s +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-debug-instrumentation,UAVSize=1024 | %FileCheck %s -// Check that the UAV size is reflected in the instrumentation. (Should be passed-in size - 64k) -// (The offset here is the "dumping ground" for non-interesting invocations) -// 100,000 - 65.536 = 34,464 - -// CHECK: %OffsetAddend = mul i32 34464, %ComplementOfMultiplicand +// Check that the UAV size is reflected in the instrumentation. +// The AND should be (1024/4-1), and the or should be 1024/2: +// CHECK: %PIXOffsetOr = phi i32 [ 0, %PIXInterestingBlock ], [ 512, %PIXNonInterestingBlock ] +// CHECK: and i32 {{.*}}, 255 [RootSignature("")] float4 main() : SV_Target { diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugVSParameters.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugVSParameters.hlsl index 628949a9e..6b15cdf25 100644 --- a/tools/clang/test/HLSLFileCheck/pix/DebugVSParameters.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/DebugVSParameters.hlsl @@ -8,7 +8,6 @@ // CHECK: %CompareToVertId = icmp eq i32 %VertId, 1 // CHECK: %CompareToInstanceId = icmp eq i32 %InstanceId, 2 // CHECK: %CompareBoth = and i1 %CompareToVertId, %CompareToInstanceId -// CHECK: %OffsetMultiplicand = zext i1 %CompareBoth to i32 [RootSignature("")] diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index 3d64eace9..a41b806ef 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -131,6 +131,9 @@ public: TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest) + TEST_METHOD(DebugInstrumentation_TextOutput) + TEST_METHOD(DebugInstrumentation_BlockReport) + dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -188,6 +191,32 @@ public: std::move(pOptimizedModule), {}, Tokenize(outputText.c_str(), "\n")}; } + PassOutput RunDebugPass(IDxcBlob *dxil, int UAVSize = 1024 * 1024) { + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::vector Options; + Options.push_back(L"-opt-mod-passes"); + Options.push_back(L"-dxil-dbg-value-to-dbg-declare"); + Options.push_back(L"-dxil-annotate-with-virtual-regs"); + std::wstring debugArg = + L"-hlsl-dxil-debug-instrumentation,UAVSize=" + std::to_wstring(UAVSize); + Options.push_back(debugArg.c_str()); + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + std::string outputText; + if (pText->GetBufferSize() != 0) { + outputText = reinterpret_cast(pText->GetBufferPointer()); + } + + return { + std::move(pOptimizedModule), {}, Tokenize(outputText.c_str(), "\n")}; + } + CComPtr FindModule(hlsl::DxilFourCC fourCC, IDxcBlob *pSource) { const UINT32 BC_C0DE = ((INT32)(INT8)'B' | (INT32)(INT8)'C' << 8 | (INT32)0xDEC0 << 16); // BC0xc0de in big endian @@ -2570,3 +2599,96 @@ void MyMiss(inout MyPayload payload) auto compiledLib = Compile(m_dllSupport, source, L"lib_6_6", {}); RunDxilPIXDXRInvocationsLog(compiledLib); } + +TEST_F(PixTest, DebugInstrumentation_TextOutput) { + + const char *source = R"x( +float4 main() : SV_Target { + return float4(0,0,0,0); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"ps_6_0", {}); + auto output = RunDebugPass(compiled, 8 /*ludicrously low UAV size limit*/); + bool foundStaticOverflow = false; + bool foundCounterOffset = false; + bool foundThreshold = false; + for (auto const &line : output.lines) { + if (line.find("StaticOverflow:12") != std::string::npos) + foundStaticOverflow = true; + if (line.find("InterestingCounterOffset:3") != std::string::npos) + foundCounterOffset = true; + if (line.find("OverflowThreshold:1") != std::string::npos) + foundThreshold = true; + } + VERIFY_IS_TRUE(foundStaticOverflow); +} + +TEST_F(PixTest, DebugInstrumentation_BlockReport) { + + const char *source = R"x( +RWStructuredBuffer UAV: register(u0); +float4 main() : SV_Target { + // basic int variable + int v = UAV[0]; + if(v == 0) + UAV[1] = v; + else + UAV[2] = v; + // float with indexed alloca + float f[2]; + f[0] = UAV[4]; + f[1] = UAV[5]; + if(v == 2) + f[0] = v; + else + f[1] = v; + float farray2[2]; + farray2[0] = UAV[4]; + farray2[1] = UAV[5]; + if(v == 4) + farray2[0] = v; + else + farray2[1] = v; + double d = UAV[8]; + int64_t i64 = UAV[9]; + return float4(d,i64,0,0); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"ps_6_0", {L"-Od"}); + auto output = RunDebugPass(compiled); + bool foundBlock = false; + bool foundRet = false; + bool foundUnnumberedVoidProllyADXNothing = false; + bool found32BitAssignment = false; + bool foundFloatAssignment = false; + bool foundDoubleAssignment = false; + bool found64BitAssignment = false; + bool found32BitAllocaStore = false; + for (auto const &line : output.lines) { + if (line.find("Block#") != std::string::npos) { + if (line.find("r,0,r;") != std::string::npos) + foundRet = true; + if (line.find("v,0,v;") != std::string::npos) + foundUnnumberedVoidProllyADXNothing = true; + if (line.find("3,3,a;") != std::string::npos) + found32BitAssignment = true; + if (line.find("d,13,a;") != std::string::npos) + foundDoubleAssignment = true; + if (line.find("f,19,a;") != std::string::npos) + foundFloatAssignment = true; + if (line.find("6,16,a;") != std::string::npos) + found64BitAssignment = true; + if (line.find("3,3,s,2+0;") != std::string::npos) + found32BitAllocaStore = true; + foundBlock = true; + } + } + VERIFY_IS_TRUE(foundBlock); + VERIFY_IS_TRUE(foundRet); + VERIFY_IS_TRUE(foundUnnumberedVoidProllyADXNothing); + VERIFY_IS_TRUE(found32BitAssignment); + VERIFY_IS_TRUE(found64BitAssignment); + VERIFY_IS_TRUE(foundFloatAssignment); + VERIFY_IS_TRUE(foundDoubleAssignment); + VERIFY_IS_TRUE(found32BitAllocaStore); +}