From 47226d04224280a11a053946ce8459995212c33f Mon Sep 17 00:00:00 2001 From: Jeff Noyle Date: Tue, 25 Jul 2017 16:08:31 -0700 Subject: [PATCH] PIX: Pixel count/cost pass (#457) The pass implements functionality for PIX for the "pixel cost", "depth complexity" and "overdraw" visualizers. You can probably infer what the pass does from the names "overdraw" and "depth complexity": For each pixel rendered it increments a corresponding counter in a UAV of a buffer that is the same size as the render target. The "pixel cost" pass does the same thing, only the increment is a weight value calculated from the total cost of the draw call, as derived from PIX's GPU-side profiling system. --- include/dxc/HLSL/DxilGenerationPass.h | 2 + lib/HLSL/CMakeLists.txt | 1 + lib/HLSL/DxcOptimizer.cpp | 9 + lib/HLSL/DxilAddPixelHitInstrumentation.cpp | 283 ++++++++++++++++++ lib/HLSL/DxilModule.cpp | 6 +- tools/clang/test/HLSL/pix/pixelCounter.hlsl | 24 ++ .../HLSL/pix/pixelCounterAddPixelCost.hlsl | 16 + .../test/HLSL/pix/pixelCounterEarlyZ.hlsl | 18 ++ .../pix/pixelCounterInappropriateEarlyZ.hlsl | 13 + .../HLSL/pix/pixelCounterNoSvPosition.hlsl | 14 + tools/clang/unittests/HLSL/CompilerTest.cpp | 26 ++ utils/hct/hctdb.py | 5 + 12 files changed, 414 insertions(+), 3 deletions(-) create mode 100644 lib/HLSL/DxilAddPixelHitInstrumentation.cpp create mode 100644 tools/clang/test/HLSL/pix/pixelCounter.hlsl create mode 100644 tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl create mode 100644 tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl create mode 100644 tools/clang/test/HLSL/pix/pixelCounterInappropriateEarlyZ.hlsl create mode 100644 tools/clang/test/HLSL/pix/pixelCounterNoSvPosition.hlsl diff --git a/include/dxc/HLSL/DxilGenerationPass.h b/include/dxc/HLSL/DxilGenerationPass.h index c22c34431..8278dd120 100644 --- a/include/dxc/HLSL/DxilGenerationPass.h +++ b/include/dxc/HLSL/DxilGenerationPass.h @@ -52,6 +52,7 @@ ModulePass *createDxilLegalizeStaticResourceUsePass(); ModulePass *createDxilLegalizeEvalOperationsPass(); FunctionPass *createDxilLegalizeSampleOffsetPass(); FunctionPass *createSimplifyInstPass(); +ModulePass *createDxilAddPixelHitInstrumentationPass(); ModulePass *createDxilOutputColorBecomesConstantPass(); ModulePass *createDxilRemoveDiscardsPass(); ModulePass *createDxilReduceMSAAToSingleSamplePass(); @@ -71,6 +72,7 @@ void initializeDxilLegalizeStaticResourceUsePassPass(llvm::PassRegistry&); void initializeDxilLegalizeEvalOperationsPass(llvm::PassRegistry&); void initializeDxilLegalizeSampleOffsetPassPass(llvm::PassRegistry&); void initializeSimplifyInstPass(llvm::PassRegistry&); +void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry&); void initializeDxilOutputColorBecomesConstantPass(llvm::PassRegistry&); void initializeDxilRemoveDiscardsPass(llvm::PassRegistry&); void initializeDxilReduceMSAAToSingleSamplePass(llvm::PassRegistry&); diff --git a/lib/HLSL/CMakeLists.txt b/lib/HLSL/CMakeLists.txt index 6a6426e31..4593280e7 100644 --- a/lib/HLSL/CMakeLists.txt +++ b/lib/HLSL/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMHLSL ComputeViewIdState.cpp ControlDependence.cpp + DxilAddPixelHitInstrumentation.cpp DxilCBuffer.cpp DxilCompType.cpp DxilCondenseResources.cpp diff --git a/lib/HLSL/DxcOptimizer.cpp b/lib/HLSL/DxcOptimizer.cpp index 202dbb9bf..d6cd5f70f 100644 --- a/lib/HLSL/DxcOptimizer.cpp +++ b/lib/HLSL/DxcOptimizer.cpp @@ -82,6 +82,7 @@ HRESULT SetupRegistryPassForHLSL() { initializeDCEPass(Registry); initializeDSEPass(Registry); initializeDeadInstEliminationPass(Registry); + initializeDxilAddPixelHitInstrumentationPass(Registry); initializeDxilCondenseResourcesPass(Registry); initializeDxilEliminateOutputDynamicIndexingPass(Registry); initializeDxilEmitMetadataPass(Registry); @@ -170,6 +171,7 @@ static ArrayRef GetPassArgNames(LPCSTR passName) { static const LPCSTR AlwaysInlinerArgs[] = { "InsertLifetime", "InlineThreshold" }; static const LPCSTR ArgPromotionArgs[] = { "maxElements" }; static const LPCSTR CFGSimplifyPassArgs[] = { "Threshold", "Ftor", "bonus-inst-threshold" }; + static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "force-early-z", "add-pixel-cost", "rt-width", "num-pixels" }; static const LPCSTR DxilGenerationPassArgs[] = { "NotOptimized" }; static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "mod-mode", "constant-red", "constant-green", "constant-blue", "constant-alpha" }; static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "ReplaceAllVectors" }; @@ -200,6 +202,7 @@ static ArrayRef GetPassArgNames(LPCSTR passName) { if (strcmp(passName, "always-inline") == 0) return ArrayRef(AlwaysInlinerArgs, _countof(AlwaysInlinerArgs)); if (strcmp(passName, "argpromotion") == 0) return ArrayRef(ArgPromotionArgs, _countof(ArgPromotionArgs)); if (strcmp(passName, "simplifycfg") == 0) return ArrayRef(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs)); + if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs)); if (strcmp(passName, "dxilgen") == 0) return ArrayRef(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs)); if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs)); if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs)); @@ -237,6 +240,7 @@ static ArrayRef GetPassArgDescriptions(LPCSTR passName) { static const LPCSTR AlwaysInlinerArgs[] = { "Insert @llvm.lifetime intrinsics", "Insert @llvm.lifetime intrinsics" }; static const LPCSTR ArgPromotionArgs[] = { "None" }; static const LPCSTR CFGSimplifyPassArgs[] = { "None", "None", "Control the number of bonus instructions (default = 1)" }; + static const LPCSTR DxilAddPixelHitInstrumentationArgs[] = { "None", "None", "None", "None" }; static const LPCSTR DxilGenerationPassArgs[] = { "None" }; static const LPCSTR DxilOutputColorBecomesConstantArgs[] = { "None", "None", "None", "None", "None" }; static const LPCSTR DynamicIndexingVectorToArrayArgs[] = { "None" }; @@ -267,6 +271,7 @@ static ArrayRef GetPassArgDescriptions(LPCSTR passName) { if (strcmp(passName, "always-inline") == 0) return ArrayRef(AlwaysInlinerArgs, _countof(AlwaysInlinerArgs)); if (strcmp(passName, "argpromotion") == 0) return ArrayRef(ArgPromotionArgs, _countof(ArgPromotionArgs)); if (strcmp(passName, "simplifycfg") == 0) return ArrayRef(CFGSimplifyPassArgs, _countof(CFGSimplifyPassArgs)); + if (strcmp(passName, "hlsl-dxil-add-pixel-hit-instrmentation") == 0) return ArrayRef(DxilAddPixelHitInstrumentationArgs, _countof(DxilAddPixelHitInstrumentationArgs)); if (strcmp(passName, "dxilgen") == 0) return ArrayRef(DxilGenerationPassArgs, _countof(DxilGenerationPassArgs)); if (strcmp(passName, "hlsl-dxil-constantColor") == 0) return ArrayRef(DxilOutputColorBecomesConstantArgs, _countof(DxilOutputColorBecomesConstantArgs)); if (strcmp(passName, "dynamic-vector-to-array") == 0) return ArrayRef(DynamicIndexingVectorToArrayArgs, _countof(DynamicIndexingVectorToArrayArgs)); @@ -319,6 +324,7 @@ static bool IsPassOptionName(StringRef S) { || S.equals("TIRA") || S.equals("TLIImpl") || S.equals("Threshold") + || S.equals("add-pixel-cost") || S.equals("bonus-inst-threshold") || S.equals("constant-alpha") || S.equals("constant-blue") @@ -330,6 +336,7 @@ static bool IsPassOptionName(StringRef S) { || S.equals("enable-scoped-noalias") || S.equals("enable-tbaa") || S.equals("float2int-max-integer-bw") + || S.equals("force-early-z") || S.equals("force-ssa-updater") || S.equals("jump-threading-threshold") || S.equals("likely-branch-weight") @@ -344,10 +351,12 @@ static bool IsPassOptionName(StringRef S) { || S.equals("mod-mode") || S.equals("no-discriminators") || S.equals("noloads") + || S.equals("num-pixels") || S.equals("pragma-unroll-threshold") || S.equals("reroll-num-tolerated-failed-matches") || S.equals("rewrite-map-file") || S.equals("rotation-max-header-size") + || S.equals("rt-width") || S.equals("sample-profile-file") || S.equals("sample-profile-max-propagate-iterations") || S.equals("sroa-random-shuffle-slices") diff --git a/lib/HLSL/DxilAddPixelHitInstrumentation.cpp b/lib/HLSL/DxilAddPixelHitInstrumentation.cpp new file mode 100644 index 000000000..b7664750c --- /dev/null +++ b/lib/HLSL/DxilAddPixelHitInstrumentation.cpp @@ -0,0 +1,283 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilAddPixelHitInstrumentation.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides a pass to add instrumentation to determine pixel hit count and // +// cost. Used by PIX. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "dxc/HLSL/DxilGenerationPass.h" +#include "dxc/HLSL/DxilOperations.h" +#include "dxc/HLSL/DxilSignatureElement.h" +#include "dxc/HLSL/DxilModule.h" +#include "dxc/Support/Global.h" +#include "dxc/HLSL/DxilTypeSystem.h" +#include "dxc/HLSL/DxilConstants.h" +#include "dxc/HLSL/DxilInstructions.h" +#include "dxc/HLSL/DxilSpanAllocator.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include +#include + +using namespace llvm; +using namespace hlsl; + +class DxilAddPixelHitInstrumentation : public ModulePass { + + bool ForceEarlyZ = false; + bool AddPixelCost = false; + int RTWidth = 1024; + int NumPixels = 128; + +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilAddPixelHitInstrumentation() : ModulePass(ID) {} + const char *getPassName() const override { return "DXIL Constant Color Mod"; } + void applyOptions(PassOptions O) override; + bool runOnModule(Module &M) override; +}; + +void DxilAddPixelHitInstrumentation::applyOptions(PassOptions O) +{ + for (const auto & option : O) + { + if (0 == option.first.compare("force-early-z")) + { + ForceEarlyZ = atoi(option.second.data()) != 0; + } + else if (0 == option.first.compare("rt-width")) + { + RTWidth = atoi(option.second.data()); + } + else if (0 == option.first.compare("num-pixels")) + { + NumPixels = atoi(option.second.data()); + } + else if (0 == option.first.compare("add-pixel-cost")) + { + AddPixelCost = atoi(option.second.data()) != 0; + } + } +} + +bool DxilAddPixelHitInstrumentation::runOnModule(Module &M) +{ + // This pass adds instrumentation for pixel hit counting and pixel cost. + + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext & Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + // ForceEarlyZ is incompatible with the discard function (the Z has to be tested/written, and may be written before the shader even runs) + if (ForceEarlyZ) + { + if (HlslOP->GetOpFunc(DXIL::OpCode::Discard, Type::getVoidTy(Ctx))->user_empty()) + { + DM.m_ShaderFlags.SetForceEarlyDepthStencil(true); + } + } + + hlsl::DxilSignature & InputSignature = DM.GetInputSignature(); + + auto & InputElements = InputSignature.GetElements(); + + unsigned SV_Position_ID; + + auto SV_Position = std::find_if(InputElements.begin(), InputElements.end(), [](const std::unique_ptr & Element) { + return Element->GetSemantic()->GetKind() == hlsl::DXIL::SemanticKind::Position; }); + + // SV_Position, if present, has to have full mask, so we needn't worry + // about the shader having selected components that don't include x or y. + // If not present, we add it. + if ( SV_Position == InputElements.end() ) { + auto SVPosition = std::make_unique(DXIL::SigPointKind::PSIn); + SVPosition->Initialize("Position", hlsl::CompType::getF32(), hlsl::DXIL::InterpolationMode::Linear, 1, 4, 0, 0); + SVPosition->AppendSemanticIndex(0); + SVPosition->SetSigPointKind(DXIL::SigPointKind::PSIn); + SVPosition->SetKind(hlsl::DXIL::SemanticKind::Position); + + auto index = InputSignature.AppendElement(std::move(SVPosition)); + SV_Position_ID = InputElements[index]->GetID(); + } + else { + SV_Position_ID = SV_Position->get()->GetID(); + } + + auto EntryPointFunction = DM.GetEntryFunction(); + + auto & EntryBlock = EntryPointFunction->getEntryBlock(); + bool HaveInsertedUAV = false; + + CallInst *HandleForUAV; + + // todo: is it a reasonable assumption that there will be a "Ret" in the entry block, and that these are the only + // points from which the shader can exit (except for a pixel-kill?) + auto & Instructions = EntryBlock.getInstList(); + auto It = Instructions.begin(); + while(It != Instructions.end()) { + auto ThisInstruction = It++; + LlvmInst_Ret Ret(ThisInstruction); + if (Ret) { + // Check that there is at least one instruction preceding the Ret (no need to instrument it if there isn't) + if (ThisInstruction->getPrevNode() != nullptr) { + + // Start adding instructions right before the Ret: + IRBuilder<> Builder(ThisInstruction); + + if (!HaveInsertedUAV) { + + // Set up a UAV with structure of a single int + SmallVector Elements{ Type::getInt32Ty(Ctx) }; + llvm::StructType *UAVStructTy = llvm::StructType::create(Elements, "PIX_CountUAV_Type"); + std::unique_ptr pUAV = llvm::make_unique(); + pUAV->SetGlobalName("PIX_CountUAVName"); + pUAV->SetGlobalSymbol(UndefValue::get(UAVStructTy->getPointerTo())); + pUAV->SetID(0); + pUAV->SetSpaceID((unsigned int)-2); // This is the reserved-for-tools register space + pUAV->SetSampleCount(1); + pUAV->SetGloballyCoherent(false); + pUAV->SetHasCounter(false); + pUAV->SetCompType(CompType::getI32()); + pUAV->SetLowerBound(0); + pUAV->SetRangeSize(1); + pUAV->SetKind(DXIL::ResourceKind::StructuredBuffer); + pUAV->SetElementStride(4); + + ID = DM.AddUAV(std::move(pUAV)); + + // Create handle for the newly-added UAV + Function* CreateHandleOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::CreateHandle, Type::getVoidTy(Ctx)); + Constant* CreateHandleOpcodeArg = HlslOP->GetU32Const((unsigned)DXIL::OpCode::CreateHandle); + Constant* UAVVArg = HlslOP->GetI8Const(static_cast::type>(DXIL::ResourceClass::UAV)); + Constant* MetaDataArg = HlslOP->GetU32Const(ID); // position of the metadata record in the corresponding metadata list + Constant* IndexArg = HlslOP->GetU32Const(0); // + Constant* FalseArg = HlslOP->GetI1Const(0); // non-uniform resource index: false + HandleForUAV = Builder.CreateCall(CreateHandleOpFunc, + { CreateHandleOpcodeArg, UAVVArg, MetaDataArg, IndexArg, FalseArg }, "PIX_CountUAV_Handle"); + + DM.ReEmitDxilResources(); + + HaveInsertedUAV = true; + } + + // ------------------------------------------------------------------------------------------------------------ + // Generate instructions to increment (by one) a UAV value corresponding to the pixel currently being rendered + // ------------------------------------------------------------------------------------------------------------ + + // Useful constants + Constant* Zero32Arg = HlslOP->GetU32Const(0); + Constant* Zero8Arg = HlslOP->GetI8Const(0); + Constant* One32Arg = HlslOP->GetU32Const(1); + Constant* One8Arg = HlslOP->GetI8Const(1); + UndefValue* UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + Constant* NumPixelsArg = HlslOP->GetU32Const(NumPixels); + Constant* NumPixelsMinusOneArg = HlslOP->GetU32Const(NumPixels-1); + + // Step 1: Convert SV_POSITION to UINT + Value * XAsInt; + Value * YAsInt; + { + auto LoadInputOpFunc = HlslOP->GetOpFunc(DXIL::OpCode::LoadInput, Type::getFloatTy(Ctx)); + Constant* LoadInputOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::LoadInput); + Constant* SV_Pos_ID = HlslOP->GetU32Const(SV_Position_ID); + auto XPos = Builder.CreateCall(LoadInputOpFunc, + { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, Zero8Arg /*column*/, UndefArg }, "XPos"); + auto YPos = Builder.CreateCall(LoadInputOpFunc, + { LoadInputOpcode, SV_Pos_ID, Zero32Arg /*row*/, One8Arg /*column*/, UndefArg }, "YPos"); + + XAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, XPos, Type::getInt32Ty(Ctx), "XIndex"); + YAsInt = Builder.CreateCast(Instruction::CastOps::FPToUI, YPos, Type::getInt32Ty(Ctx), "YIndex"); + } + + // Step 2: Calculate pixel index + Value * ClampedIndex; + { + Constant* RTWidthArg = HlslOP->GetI32Const(RTWidth); + auto YOffset = Builder.CreateMul(YAsInt, RTWidthArg); + auto Index = Builder.CreateAdd(XAsInt, YOffset); + + // Step 3: Clamp to size of UAV to prevent TDR if something goes wrong + auto CompareToLimit = Builder.CreateICmpUGT(Index, NumPixelsMinusOneArg); + ClampedIndex = Builder.CreateSelect(CompareToLimit, NumPixelsMinusOneArg, Index, "Clamped"); + } + + // Insert the UAV increment instruction: + Function* AtomicOpFunc = HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant* AtomicBinOpcode = HlslOP->GetU32Const((unsigned)OP::OpCode::AtomicBinOp); + Constant* AtomicAdd = HlslOP->GetU32Const((unsigned)DXIL::AtomicBinOpCode::Add); + { + (void)Builder.CreateCall(AtomicOpFunc, { + AtomicBinOpcode,// i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX + ClampedIndex, // i32, ; coordinate c0: index in elements + Zero32Arg, // i32, ; coordinate c1: byte offset into element + Zero32Arg, // i32, ; coordinate c2 (unused) + One32Arg // i32); increment value + }, "UAVIncResult"); + } + + if (AddPixelCost) { + // ------------------------------------------------------------------------------------------------------------ + // Generate instructions to increment a value corresponding to the current pixel in the second half of the UAV, + // by an amount proportional to the estimated average cost of each pixel in the current draw call. + // ------------------------------------------------------------------------------------------------------------ + + // Step 1: Retrieve weight value from UAV; it will be placed after the range we're writing to + Value * Weight; + { + Function* LoadWeight = HlslOP->GetOpFunc(OP::OpCode::BufferLoad, Type::getInt32Ty(Ctx)); + Constant* LoadWeightOpcode = HlslOP->GetU32Const((unsigned)DXIL::OpCode::BufferLoad); + Constant* OffsetIntoUAV = HlslOP->GetU32Const(NumPixels * 2); + auto WeightStruct = Builder.CreateCall(LoadWeight, { + LoadWeightOpcode, // i32 opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + OffsetIntoUAV, // i32 c0: index in elements into UAV + Zero32Arg // i32 c1: byte offset into struct + }, "WeightStruct"); + Weight = Builder.CreateExtractValue(WeightStruct, static_cast(0LL), "Weight"); + } + + // Step 2: Update write position ("Index") to second half of the UAV + auto OffsetIndex = Builder.CreateAdd(ClampedIndex, NumPixelsArg); + + // Step 3: Increment UAV value by the weight + (void)Builder.CreateCall(AtomicOpFunc,{ + AtomicBinOpcode, // i32, ; opcode + HandleForUAV, // %dx.types.Handle, ; resource handle + AtomicAdd, // i32, ; binary operation code : EXCHANGE, IADD, AND, OR, XOR, IMIN, IMAX, UMIN, UMAX + OffsetIndex, // i32, ; coordinate c0: index in elements + Zero32Arg, // i32, ; coordinate c1: byte offset into element + Zero32Arg, // i32, ; coordinate c2 (unused) + Weight // i32); increment value + }, "UAVIncResult2"); + } + } + } + } + + bool Modified = false; + + return Modified; +} + +char DxilAddPixelHitInstrumentation::ID = 0; + +ModulePass *llvm::createDxilAddPixelHitInstrumentationPass() { + return new DxilAddPixelHitInstrumentation(); +} + +INITIALIZE_PASS(DxilAddPixelHitInstrumentation, "hlsl-dxil-add-pixel-hit-instrmentation", "DXIL Count completed PS invocations and costs", false, false) diff --git a/lib/HLSL/DxilModule.cpp b/lib/HLSL/DxilModule.cpp index d78f0001e..28ca19011 100644 --- a/lib/HLSL/DxilModule.cpp +++ b/lib/HLSL/DxilModule.cpp @@ -1364,9 +1364,9 @@ void DxilModule::ReEmitDxilResources() { const llvm::MDOperand *pSignatures, *pResources, *pProperties; m_pMDHelper->GetDxilEntryPoint(pEntries->getOperand(0), pEntryFunc, EntryName, pSignatures, pResources, pProperties); - MDTuple *pMDSig = pSignatures? (MDTuple*)pSignatures->get():nullptr; - MDTuple *pMDProperties = pProperties ? (MDTuple*)pProperties->get():nullptr; - MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(pEntryFunc, EntryName, pMDSig, pNewResource, pMDProperties); + MDTuple *pMDSignatures = m_pMDHelper->EmitDxilSignatures(*m_EntrySignature); + MDTuple *pMDProperties = EmitDxilShaderProperties(); + MDTuple *pEntry = m_pMDHelper->EmitDxilEntryPointTuple(pEntryFunc, EntryName, pMDSignatures, pNewResource, pMDProperties); vector Entries; Entries.emplace_back(pEntry); m_pMDHelper->UpdateDxilEntryPoints(Entries); diff --git a/tools/clang/test/HLSL/pix/pixelCounter.hlsl b/tools/clang/test/HLSL/pix/pixelCounter.hlsl new file mode 100644 index 000000000..796c9e741 --- /dev/null +++ b/tools/clang/test/HLSL/pix/pixelCounter.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64 | %FileCheck %s + +// Check that the input semantic was read correctly: +// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) +// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) + +// The cast-to-int: +// CHECK: %XIndex = fptoui float %XPos to i32 +// CHECK: %YIndex = fptoui float %YPos to i32 + +// Calculation of offset: +// CHECK: %4 = mul i32 %YIndex, 16 +// CHECK: %5 = add i32 %XIndex, %4 +// CHECK: %6 = icmp ugt i32 %5, 63 + +// Clamp to UAV size: +// CHECK: %Clamped = select i1 %6, i32 63, i32 %5 + +// Check the write to the UAV was emitted: +// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1) + +float4 main(float4 pos : SV_Position) : SV_Target { + return pos; +} diff --git a/tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl b/tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl new file mode 100644 index 000000000..6c567cfec --- /dev/null +++ b/tools/clang/test/HLSL/pix/pixelCounterAddPixelCost.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,add-pixel-cost=1 | %FileCheck %s + +// Check the write to the UAV was emitted: +// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1) + +// Check for pixel cost instructions: +// CHECK: %WeightStruct = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %PIX_CountUAV_Handle, i32 128, i32 0) +// CHECK: %Weight = extractvalue %dx.types.ResRet.i32 %WeightStruct, 0 +// CHECK: %7 = add i32 %Clamped, 64 +// CHECK: %UAVIncResult2 = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %7, i32 0, i32 0, i32 %Weight) + + + +float4 main(float4 pos : SV_Position) : SV_Target { + return pos; +} diff --git a/tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl b/tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl new file mode 100644 index 000000000..5b5fcfe9e --- /dev/null +++ b/tools/clang/test/HLSL/pix/pixelCounterEarlyZ.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,force-early-z=1 | %FileCheck %s + +// Check the write to the UAV was emitted: +// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1) + +// Early z flag value is 8. The flags are stored in the last entry in the entry function description record. See: +// https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-properties-and-capabilities +// CHECK: !{i32 0, i64 8} +// Make sure it's the last entry: +// CHECK-NOT: !{ + +float4 main(float4 pos : SV_Position) : SV_Target { + return pos; +} + + + + diff --git a/tools/clang/test/HLSL/pix/pixelCounterInappropriateEarlyZ.hlsl b/tools/clang/test/HLSL/pix/pixelCounterInappropriateEarlyZ.hlsl new file mode 100644 index 000000000..e7437c965 --- /dev/null +++ b/tools/clang/test/HLSL/pix/pixelCounterInappropriateEarlyZ.hlsl @@ -0,0 +1,13 @@ +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64,force-early-z=1 | %FileCheck %s + +// Check the write to the UAV was emitted: +// CHECK: %UAVIncResult = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle %PIX_CountUAV_Handle, i32 0, i32 %Clamped, i32 0, i32 0, i32 1) + +// Early z should NOT be present even though we asked for it, due to the discard instruction. That 8 (with its key value of 0) at the end should be a 0 +// CHECK-NOT: !{i32 0, i64 8} + +float4 main(float4 pos : SV_Position) : SV_Target{ + discard; + return pos; +} + diff --git a/tools/clang/test/HLSL/pix/pixelCounterNoSvPosition.hlsl b/tools/clang/test/HLSL/pix/pixelCounterNoSvPosition.hlsl new file mode 100644 index 000000000..20d8afc97 --- /dev/null +++ b/tools/clang/test/HLSL/pix/pixelCounterNoSvPosition.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -Emain -Tps_6_0 %s | %opt -S -hlsl-dxil-add-pixel-hit-instrmentation,rt-width=16,num-pixels=64 | %FileCheck %s + +// Check the read from SV_Position was added: +// CHECK: %XPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) +// CHECK: %YPos = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) + +// Check the SV_Position meta-data was added: +// CHECK: !{i32 0, !"SV_Position", i8 9, i8 3, + +float4 main() : SV_Target{ + discard; + return float4(0,0,0,0); +} + diff --git a/tools/clang/unittests/HLSL/CompilerTest.cpp b/tools/clang/unittests/HLSL/CompilerTest.cpp index 5f8100744..8fdf58b0b 100644 --- a/tools/clang/unittests/HLSL/CompilerTest.cpp +++ b/tools/clang/unittests/HLSL/CompilerTest.cpp @@ -407,6 +407,7 @@ public: TEST_METHOD(CompileBadHlslThenFail) TEST_METHOD(CompileLegacyShaderModelThenFail) TEST_METHOD(CompileWhenRecursiveAlbeitStaticTermThenFail) + TEST_METHOD(CompileWhenRecursiveThenFail) TEST_METHOD(CompileHlsl2015ThenFail) @@ -420,6 +421,11 @@ public: TEST_METHOD(PixMSAAToSample0) TEST_METHOD(PixRemoveDiscards) + TEST_METHOD(PixPixelCounter) + TEST_METHOD(PixPixelCounterEarlyZ) + TEST_METHOD(PixPixelCounterNoSvPosition) + TEST_METHOD(PixPixelCounterInappropriateEarlyZ) + TEST_METHOD(PixPixelCounterAddPixelCost) TEST_METHOD(PixConstantColor) TEST_METHOD(PixConstantColorInt) TEST_METHOD(PixConstantColorMRT) @@ -2723,6 +2729,26 @@ TEST_F(CompilerTest, PixRemoveDiscards) { CodeGenTestCheck(L"pix\\removeDiscards.hlsl"); } +TEST_F(CompilerTest, PixPixelCounter) { + CodeGenTestCheck(L"pix\\pixelCounter.hlsl"); +} + +TEST_F(CompilerTest, PixPixelCounterEarlyZ) { + CodeGenTestCheck(L"pix\\pixelCounterEarlyZ.hlsl"); +} + +TEST_F(CompilerTest, PixPixelCounterNoSvPosition) { + CodeGenTestCheck(L"pix\\pixelCounterNoSvPosition.hlsl"); +} + +TEST_F(CompilerTest, PixPixelCounterInappropriateEarlyZ) { + CodeGenTestCheck(L"pix\\pixelCounterInappropriateEarlyZ.hlsl"); +} + +TEST_F(CompilerTest, PixPixelCounterAddPixelCost) { + CodeGenTestCheck(L"pix\\pixelCounterAddPixelCost.hlsl"); +} + TEST_F(CompilerTest, PixConstantColor) { CodeGenTestCheck(L"pix\\constantcolor.hlsl"); } diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index eff57a9c7..82f8c9e17 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1268,6 +1268,11 @@ class db_dxil(object): add_pass('resource-handle', 'ResourceToHandle', 'Lower resource into handle', []) add_pass('hlsl-dxil-condense', 'DxilCondenseResources', 'DXIL Condense Resources', []) add_pass('hlsl-dxil-eliminate-output-dynamic', 'DxilEliminateOutputDynamicIndexing', 'DXIL eliminate ouptut dynamic indexing', []) + add_pass('hlsl-dxil-add-pixel-hit-instrmentation', 'DxilAddPixelHitInstrumentation', 'DXIL Count completed PS invocations and costs', [ + {'n':'force-early-z','t':'int','c':1}, + {'n':'add-pixel-cost','t':'int','c':1}, + {'n':'rt-width','t':'int','c':1}, + {'n':'num-pixels','t':'int','c':1}]) add_pass('hlsl-dxil-constantColor', 'DxilOutputColorBecomesConstant', 'DXIL Constant Color Mod', [ {'n':'mod-mode','t':'int','c':1}, {'n':'constant-red','t':'float','c':1},