diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h index 907549e27..4ba2fcf24 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h +++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h @@ -26,6 +26,7 @@ ModulePass *createDxilForceEarlyZPass(); ModulePass *createDxilDebugInstrumentationPass(); ModulePass *createDxilShaderAccessTrackingPass(); ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass(); +ModulePass *createDxilPIXDXRInvocationsLogPass(); void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&); void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry&); @@ -38,5 +39,6 @@ void initializeDxilForceEarlyZPass(llvm::PassRegistry&); void initializeDxilDebugInstrumentationPass(llvm::PassRegistry&); void initializeDxilShaderAccessTrackingPass(llvm::PassRegistry&); void initializeDxilPIXAddTidToAmplificationShaderPayloadPass(llvm::PassRegistry&); +void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry&); } diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt index 20ad60d9b..c36d11d55 100644 --- a/lib/DxilPIXPasses/CMakeLists.txt +++ b/lib/DxilPIXPasses/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMDxilPIXPasses DxilPIXVirtualRegisters.cpp PixPassHelpers.cpp DxilPIXAddTidToAmplificationShaderPayload.cpp + DxilPIXDXRInvocationsLog.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DxilPIXPasses/DxilPIXDXRInvocationsLog.cpp b/lib/DxilPIXPasses/DxilPIXDXRInvocationsLog.cpp new file mode 100644 index 000000000..e2415f8d8 --- /dev/null +++ b/lib/DxilPIXPasses/DxilPIXDXRInvocationsLog.cpp @@ -0,0 +1,234 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilPIXDXRInvocationsLog.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/DXIL/DxilFunctionProps.h" +#include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilUtil.h" + +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/DxilPIXPasses/DxilPIXPasses.h" + +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Utils/Local.h" + +#include "PixPassHelpers.h" + +using namespace llvm; +using namespace hlsl; +using namespace PIXPassHelpers; + +class DxilPIXDXRInvocationsLog : public ModulePass { + uint64_t m_MaxNumEntriesInLog = 1; +public: + static char ID; + DxilPIXDXRInvocationsLog() : ModulePass(ID) {} + StringRef getPassName() const override { + return "DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV"; + } + + void applyOptions(PassOptions O) override; + bool runOnModule(Module &M) override; +}; + +static DXIL::ShaderKind GetShaderKind(DxilModule const &DM, llvm::Function const *entryFunction) +{ + DXIL::ShaderKind ShaderKind = DXIL::ShaderKind::Invalid; + if (!DM.HasDxilFunctionProps(entryFunction)) { + auto ShaderModel = DM.GetShaderModel(); + ShaderKind = ShaderModel->GetKind(); + } + else { + auto const& Props = DM.GetDxilFunctionProps(entryFunction); + ShaderKind = Props.shaderKind; + } + + return ShaderKind; +} + +void DxilPIXDXRInvocationsLog::applyOptions(PassOptions O) { + GetPassOptionUInt64(O, "maxNumEntriesInLog", &m_MaxNumEntriesInLog, 1); // Use a silly default value. PIX should set a better value here. +} + +bool DxilPIXDXRInvocationsLog::runOnModule(Module &M) { + + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext &Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + for (auto entryFunction : DM.GetExportedFunctions()) { + + DXIL::ShaderKind ShaderKind = GetShaderKind(DM, entryFunction); + + switch (ShaderKind) { + case DXIL::ShaderKind::Intersection: + case DXIL::ShaderKind::AnyHit: + case DXIL::ShaderKind::ClosestHit: + case DXIL::ShaderKind::Miss: + break; + + default: + continue; + } + + IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(entryFunction)); + + // Add the UAVs that we're going to write to + CallInst* HandleForCountUAV = PIXPassHelpers::CreateUAV(DM, Builder, /* registerID */ 0, "PIX_CountUAV_Handle"); + CallInst* HandleForUAV = PIXPassHelpers::CreateUAV(DM, Builder, /* registerID */ 1, "PIX_UAV_Handle"); + + DM.ReEmitDxilResources(); + + auto DispatchRaysIndexOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::DispatchRaysIndex, Type::getInt32Ty(Ctx)); + auto WorldRayOriginOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::WorldRayOrigin, Type::getFloatTy(Ctx)); + auto WorldRayDirectionOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::WorldRayDirection, Type::getFloatTy(Ctx)); + auto CurrentRayTFunc = HlslOP->GetOpFunc(DXIL::OpCode::RayTCurrent, Type::getFloatTy(Ctx)); + auto MinRayTFunc = HlslOP->GetOpFunc(DXIL::OpCode::RayTMin, Type::getFloatTy(Ctx)); + auto RayFlagsFunc = HlslOP->GetOpFunc(DXIL::OpCode::RayFlags, Type::getInt32Ty(Ctx)); + + auto *DispatchRaysIndexOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::DispatchRaysIndex); + auto *WorldRayOriginOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::WorldRayOrigin); + auto *WorldRayDirectionOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::WorldRayDirection); + auto *CurrentRayTOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::RayTCurrent); + auto *MinRayTOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::RayTMin); + auto *RayFlagsOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::RayFlags); + + auto DispatchRaysX = Builder.CreateCall(DispatchRaysIndexOpFunc, { DispatchRaysIndexOpcode, HlslOP->GetI8Const(0) }, "DispatchRaysX"); + auto DispatchRaysY = Builder.CreateCall(DispatchRaysIndexOpFunc, { DispatchRaysIndexOpcode, HlslOP->GetI8Const(1) }, "DispatchRaysY"); + auto DispatchRaysZ = Builder.CreateCall(DispatchRaysIndexOpFunc, { DispatchRaysIndexOpcode, HlslOP->GetI8Const(2) }, "DispatchRaysZ"); + + auto WorldRayOriginX = Builder.CreateCall(WorldRayOriginOpFunc, { WorldRayOriginOpcode, HlslOP->GetI8Const(0) }, "WorldRayOriginX"); + auto WorldRayOriginY = Builder.CreateCall(WorldRayOriginOpFunc, { WorldRayOriginOpcode, HlslOP->GetI8Const(1) }, "WorldRayOriginY"); + auto WorldRayOriginZ = Builder.CreateCall(WorldRayOriginOpFunc, { WorldRayOriginOpcode, HlslOP->GetI8Const(2) }, "WorldRayOriginZ"); + + auto WorldRayDirectionX = Builder.CreateCall(WorldRayDirectionOpFunc, { WorldRayDirectionOpcode, HlslOP->GetI8Const(0) }, "WorldRayDirectionX"); + auto WorldRayDirectionY = Builder.CreateCall(WorldRayDirectionOpFunc, { WorldRayDirectionOpcode, HlslOP->GetI8Const(1) }, "WorldRayDirectionY"); + auto WorldRayDirectionZ = Builder.CreateCall(WorldRayDirectionOpFunc, { WorldRayDirectionOpcode, HlslOP->GetI8Const(2) }, "WorldRayDirectionZ"); + + auto CurrentRayT = Builder.CreateCall(CurrentRayTFunc, { CurrentRayTOpcode }, "CurrentRayT"); + auto MinRayT = Builder.CreateCall(MinRayTFunc, { MinRayTOpcode }, "MinRayT"); + auto RayFlags = Builder.CreateCall(RayFlagsFunc, { RayFlagsOpcode }, "RayFlags"); + + Function *AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant *AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp); + Constant *AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); + + Function *UMinOpFunc = HlslOP->GetOpFunc(OP::OpCode::UMin, Type::getInt32Ty(Ctx)); + Constant *UMinOpCode = HlslOP->GetU32Const((unsigned)OP::OpCode::UMin); + + Function *StoreFuncFloat = HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getFloatTy(Ctx)); + Function *StoreFuncInt = HlslOP->GetOpFunc(OP::OpCode::BufferStore, Type::getInt32Ty(Ctx)); + Constant *StoreOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::BufferStore); + + Constant *WriteMask_XYZW = HlslOP->GetI8Const(15); + Constant *WriteMask_X = HlslOP->GetI8Const(1); + Constant *ShaderKindAsConstant = HlslOP->GetU32Const((uint32_t)ShaderKind); + Constant *MaxEntryIndexAsConstant = HlslOP->GetU32Const((uint32_t)m_MaxNumEntriesInLog - 1u); + Constant *Zero32Arg = HlslOP->GetU32Const(0); + Constant *One32Arg = HlslOP->GetU32Const(1); + UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + + // Firstly we read this invocation's index within the invocations log buffer, and + // atomically increment it for the next invocation + auto *EntryIndex = Builder.CreateCall( + AtomicOpFunc, + { + AtomicBinOpcode, // i32, ; opcode + HandleForCountUAV, // %dx.types.Handle, ; resource handle + AtomicAdd, // i32, ; binary operation code + Zero32Arg, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + One32Arg // i32); increment value + }, + "EntryIndexResult"); + + // Clamp the index so that we don't write off the end of the UAV. If we clamp, then it's up to PIX to replay the work + // again with a larger log buffer. + auto* EntryIndexClamped = Builder.CreateCall(UMinOpFunc, { UMinOpCode, EntryIndex, MaxEntryIndexAsConstant }); + + const auto numBytesPerEntry = 4 + (3 * 4) + (3 * 4) + (3 * 4) + 4 + 4 + 4; // See number of bytes we store per shader invocation below + + auto EntryOffset = Builder.CreateMul(EntryIndexClamped, HlslOP->GetU32Const(numBytesPerEntry), "EntryOffset"); + auto EntryOffsetPlus16 = Builder.CreateAdd(EntryOffset, HlslOP->GetU32Const(16), "EntryOffsetPlus16"); + auto EntryOffsetPlus32 = Builder.CreateAdd(EntryOffset, HlslOP->GetU32Const(32), "EntryOffsetPlus32"); + auto EntryOffsetPlus48 = Builder.CreateAdd(EntryOffset, HlslOP->GetU32Const(48), "EntryOffsetPlus48"); + + // Then we start storing the invocation's info into the main UAV buffer + (void)Builder.CreateCall( + StoreFuncInt, + { + StoreOpcode, // i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + EntryOffset, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + ShaderKindAsConstant, // i32, ; value v0 + DispatchRaysX, // i32, ; value v1 + DispatchRaysY, // i32, ; value v2 + DispatchRaysZ, // i32, ; value v3 + WriteMask_XYZW // i8 ; + }); + + (void)Builder.CreateCall( + StoreFuncFloat, + { + StoreOpcode, // i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + EntryOffsetPlus16, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + WorldRayOriginX, // f32, ; value v0 + WorldRayOriginY, // f32, ; value v1 + WorldRayOriginZ, // f32, ; value v2 + WorldRayDirectionX, // f32, ; value v3 + WriteMask_XYZW // i8 ; + }); + + (void)Builder.CreateCall( + StoreFuncFloat, + { + StoreOpcode, // i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + EntryOffsetPlus32, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + WorldRayDirectionY, // f32, ; value v0 + WorldRayDirectionZ, // f32, ; value v1 + MinRayT, // f32, ; value v2 + CurrentRayT, // f32, ; value v3 + WriteMask_XYZW // i8 ; + }); + + (void)Builder.CreateCall( + StoreFuncInt, + { + StoreOpcode, // i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + EntryOffsetPlus48, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + RayFlags, // i32, ; value v0 + UndefArg, // i32, ; value v1 + UndefArg, // i32, ; value v2 + UndefArg, // i32, ; value v3 + WriteMask_X // i8 ; + }); + } + + return true; +} + +char DxilPIXDXRInvocationsLog::ID = 0; + +ModulePass *llvm::createDxilPIXDXRInvocationsLogPass() { + return new DxilPIXDXRInvocationsLog(); +} + +INITIALIZE_PASS(DxilPIXDXRInvocationsLog, "hlsl-dxil-pix-dxr-invocations-log", + "HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV", + false, false) diff --git a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp index 21e2fe26c..bc5e83f62 100644 --- a/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp +++ b/lib/DxilPIXPasses/DxilShaderAccessTracking.cpp @@ -900,7 +900,7 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { FOS << "ShouldAssumeDsvAccess"; } } - int uavRegId = 0; + for (auto * F : instrumentableFunctions) { DXIL::ShaderKind shaderKind = DXIL::ShaderKind::Invalid; if (!DM.HasDxilFunctionProps(F)) { @@ -917,7 +917,7 @@ bool DxilShaderAccessTracking::runOnModule(Module &M) { IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt()); m_FunctionToUAVHandle[F] = PIXPassHelpers::CreateUAV( - DM, Builder, uavRegId++, "PIX_CountUAV_Handle"); + DM, Builder, 0u, "PIX_CountUAV_Handle"); OP *HlslOP = DM.GetOP(); for (int accessStyle = static_cast(ResourceAccessStyle::None); accessStyle < static_cast(ResourceAccessStyle::EndOfEnum); diff --git a/lib/DxilPIXPasses/PixPassHelpers.cpp b/lib/DxilPIXPasses/PixPassHelpers.cpp index fbc80ddb0..73201cf2d 100644 --- a/lib/DxilPIXPasses/PixPassHelpers.cpp +++ b/lib/DxilPIXPasses/PixPassHelpers.cpp @@ -273,7 +273,7 @@ llvm::CallInst *CreateUAV(DxilModule &DM, IRBuilder<> &Builder, auto const *shaderModel = DM.GetShaderModel(); if (shaderModel->IsLib()) { - auto *Global = DM.GetModule()->getOrInsertGlobal("PIXUAV", UAVStructTy); + auto *Global = DM.GetModule()->getOrInsertGlobal(("PIXUAV" + std::to_string(registerId)).c_str(), UAVStructTy); GlobalVariable *NewGV = cast(Global); NewGV->setConstant(true); NewGV->setLinkage(GlobalValue::ExternalLinkage); @@ -293,7 +293,7 @@ llvm::CallInst *CreateUAV(DxilModule &DM, IRBuilder<> &Builder, pUAV->SetGloballyCoherent(false); pUAV->SetHasCounter(false); pUAV->SetCompType(CompType::getI32()); - pUAV->SetLowerBound(0); + pUAV->SetLowerBound(registerId); pUAV->SetRangeSize(1); pUAV->SetKind(DXIL::ResourceKind::RawBuffer); diff --git a/tools/clang/test/HLSLFileCheck/pix/LibAccessTracking.hlsl b/tools/clang/test/HLSLFileCheck/pix/LibAccessTracking.hlsl index 939d4eafd..2588b4ee2 100644 --- a/tools/clang/test/HLSLFileCheck/pix/LibAccessTracking.hlsl +++ b/tools/clang/test/HLSLFileCheck/pix/LibAccessTracking.hlsl @@ -1,7 +1,7 @@ // RUN: %dxc -EClosestHit -Tlib_6_5 %s | %opt -S -hlsl-dxil-pix-shader-access-instrumentation,config=U0:0:10i0;U0:1:2i0;.0;0;0. | %FileCheck %s // Check we added the UAV: -// CHECK: @PIXUAV = external constant %struct.RWByteAddressBuffer, align 4 +// CHECK: @PIXUAV0 = external constant %struct.RWByteAddressBuffer, align 4 // CHECK: load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @PIXUAV // CHECK: call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer // diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index bb81c2dd2..f50484ad0 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -245,6 +245,7 @@ public: TEST_METHOD(RootSignatureUpgrade_Annotation) TEST_METHOD(SymbolManager_Embedded2DArray) + TEST_METHOD(DxilPIXDXRInvocationsLog_SanityTest) dxc::DxcDllSupport m_dllSupport; VersionSupportInfo m_ver; @@ -1172,6 +1173,7 @@ static std::string ToString(std::wstring from) std::string RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob * blob); CComPtr RunDxilPIXMeshShaderOutputPass(IDxcBlob* blob); + CComPtr RunDxilPIXDXRInvocationsLog(IDxcBlob* blob); void CompileAndRunAnnotationAndGetDebugPart( dxc::DxcDllSupport &dllSupport, const char *source, const wchar_t *profile, IDxcBlob **ppDebugPart, std::vector extraArgs = {}); @@ -3087,6 +3089,28 @@ CComPtr PixTest::RunDxilPIXMeshShaderOutputPass(IDxcBlob *blob) { return pOptimizedModule; } +CComPtr PixTest::RunDxilPIXDXRInvocationsLog(IDxcBlob* blob) { + + CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::vector Options; + Options.push_back(L"-hlsl-dxil-pix-dxr-invocations-log,maxNumEntriesInLog=24"); + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + std::string outputText; + if (pText->GetBufferSize() != 0) { + outputText = reinterpret_cast(pText->GetBufferPointer()); + } + + return pOptimizedModule; +} + std::string PixTest::RunDxilPIXAddTidToAmplificationShaderPayloadPass(IDxcBlob *blob) { CComPtr dxil = FindModule(DFCC_ShaderDebugInfoDXIL, blob); @@ -5197,4 +5221,38 @@ void ASMain() ValidateStructMember(2, L"bigOne", 24*8); } +TEST_F(PixTest, DxilPIXDXRInvocationsLog_SanityTest) { + + const char* source = R"x( +struct MyPayload +{ + float4 color; +}; + +[shader("raygeneration")] +void MyRayGen() +{ +} + +[shader("closesthit")] +void MyClosestHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("anyhit")] +void MyAnyHit(inout MyPayload payload, in BuiltInTriangleIntersectionAttributes attr) +{ +} + +[shader("miss")] +void MyMiss(inout MyPayload payload) +{ +} + +)x"; + + auto compiledLib = Compile(source, L"lib_6_6", {}); + RunDxilPIXDXRInvocationsLog(compiledLib); +} + #endif diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 3cf8d8576..e14070da2 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -2142,6 +2142,8 @@ class db_dxil(object): add_pass('hlsl-dxil-PIX-add-tid-to-as-payload', 'DxilPIXAddTidToAmplificationShaderPayload', 'HLSL DXIL Add flat thread id to payload from AS to MS', [ {'n':'dispatchArgY','t':'int','c':1}, {'n':'dispatchArgZ','t':'int','c':1}]) + add_pass('hlsl-dxil-pix-dxr-invocations-log', 'DxilPIXDXRInvocationsLog', 'HLSL DXIL Logs all non-RayGen DXR 1.0 invocations into a UAV', [ + {'n':'maxNumEntriesInLog','t':'int','c':1}]) category_lib="dxil_gen"