diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 0b1e51ba0..8520fed06 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -531,6 +531,13 @@ if (UNIX AND append("-fcolor-diagnostics" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() +# HLSL Change Starts +# Enable -fms-extensions for clang to use MS uuid extensions for COM. +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + append("-fms-extensions -Wno-language-extension-token" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() +# HLSL Change Ends + # Add flags for add_dead_strip(). # FIXME: With MSVS, consider compiling with /Gy and linking with /OPT:REF? # But MinSizeRel seems to add that automatically, so maybe disable these diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 884e424ec..6f1231259 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -3104,14 +3104,14 @@ Callable Stage Mesh and Amplification Shaders ------------------------------ -DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification. -Amplification shaders corresponds to Task Shaders in Vulkan. - +| DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification. +| Amplification shaders corresponds to Task Shaders in Vulkan. +| | Refer to following HLSL and SPIR-V specs for details: | https://docs.microsoft.com/ | https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc - -This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan. +| +| This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan. Entry Point Attributes ~~~~~~~~~~~~~~~~~~~~~~ @@ -3120,18 +3120,19 @@ shaders and are translated to SPIR-V execution modes according to the table belo .. table:: Mapping from HLSL attribute to SPIR-V execution mode -+--------------------+----------------+-------------------------+ -| HLSL Attribute | Value | SPIR-V Execution Mode | -+====================+================+=========================+ -| | ``point`` | ``OutputPoints`` | -| +----------------+-------------------------+ -| ``outputtopology`` | ``line`` | ``OutputLinesNV`` | -| (Mesh shader) +----------------+-------------------------+ -| | ``triangle`` | ``OutputTrianglesNV`` | -+--------------------+----------------+-------------------------+ -| ``numthreads`` | ``X, Y, Z`` | ``LocalSize X, Y, Z`` | -| | (X*Y*Z <= 128) | | -+--------------------+----------------+-------------------------+ ++-------------------+--------------------+-------------------------+ +| HLSL Attribute | Value | SPIR-V Execution Mode | ++===================+====================+=========================+ +|``outputtopology`` | ``point`` | ``OutputPoints`` | +| +--------------------+-------------------------+ +|``(Mesh shader)`` | ``line`` | ``OutputLinesNV`` | +| +--------------------+-------------------------+ +| | ``triangle`` | ``OutputTrianglesNV`` | ++-------------------+--------------------+-------------------------+ +| ``numthreads`` | ``X, Y, Z`` | ``LocalSize X, Y, Z`` | +| | | | +| | ``(X*Y*Z <= 128)`` | | ++-------------------+--------------------+-------------------------+ Intrinsics ~~~~~~~~~~ @@ -3140,24 +3141,29 @@ and are translated to SPIR-V intrinsics according to the table below: .. table:: Mapping from HLSL intrinsics to SPIR-V intrinsics -+-------------------------+--------------------+-----------------------------------------+ -| HLSL Intrinsic | Parameters | SPIR-V Intrinsic | -+=========================+====================+=========================================+ -| ``SetMeshOutputCounts`` | ``numVertices`` | ``PrimitiveCountNV numPrimitives`` | -| (Mesh shader) | ``numPrimitives`` | | -+-------------------------+--------------------+-----------------------------------------+ -| | ``ThreadX`` | | -| ``DispatchMesh`` | ``ThreadY`` | ``OpControlBarrier`` | -| (Amplification shader) | ``ThreadZ`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` | -| | ``MeshPayload`` | | -+-------------------------+--------------------+-----------------------------------------+ ++---------------------------+--------------------+-----------------------------------------+ +| HLSL Intrinsic | Parameters | SPIR-V Intrinsic | ++===========================+====================+=========================================+ +| ``SetMeshOutputCounts`` | ``numVertices`` | ``PrimitiveCountNV numPrimitives`` | +| | | | +| ``(Mesh shader)`` | ``numPrimitives`` | | ++---------------------------+--------------------+-----------------------------------------+ +| ``DispatchMesh`` | ``ThreadX`` | ``OpControlBarrier`` | +| | | | +| ``(Amplification shader)``| ``ThreadY`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` | +| | | | +| | ``ThreadZ`` | | +| | | | +| | ``MeshPayload`` | | ++---------------------------+--------------------+-----------------------------------------+ -| *For DispatchMesh intrinsic, we also emit MeshPayload as output block with PerTaskNV decoration +| Note : For ``DispatchMesh`` intrinsic, we also emit ``MeshPayload`` as output block with ``PerTaskNV`` decoration Mesh Interface Variables ~~~~~~~~~~~~~~~~~~~~~~~~ -Interface variables are defined for Mesh shaders using HLSL modifiers. -Following table gives high level overview of the mapping: +| Interface variables are defined for Mesh shaders using HLSL modifiers. +| Following table gives high level overview of the mapping: +| .. table:: Mapping from HLSL modifiers to SPIR-V definitions @@ -3165,9 +3171,11 @@ Following table gives high level overview of the mapping: | HLSL modifier | SPIR-V definition | +=================+=========================================================================+ | ``indices`` | Maps to SPIR-V intrinsic ``PrimitiveIndicesNV`` | +| | | | | Defines SPIR-V Execution Mode ``OutputPrimitivesNV `` | +-----------------+-------------------------------------------------------------------------+ | ``vertices`` | Maps to per-vertex out attributes | +| | | | | Defines existing SPIR-V Execution Mode ``OutputVertices `` | +-----------------+-------------------------------------------------------------------------+ | ``primitives`` | Maps to per-primitive out attributes with ``PerPrimitiveNV`` decoration | @@ -3395,6 +3403,13 @@ codegen for Vulkan: - ``-fspv-target-env=``: Specifies the target environment for this compilation. The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target environment is provided, ``vulkan1.0`` is used as default. +- ``-fspv-flatten-resource-arrays``: Flattens arrays of textures and samplers + into individual resources, each taking one binding number. For example, an + array of 3 textures will become 3 texture resources taking 3 binding numbers. + This makes the behavior similar to DX. Without this option, you would get 1 + array object taking 1 binding number. Note that arrays of + {RW|Append|Consume}StructuredBuffers are currently not supported in the + SPIR-V backend. - ``-Wno-vk-ignored-features``: Does not emit warnings on ignored features resulting from no Vulkan support, e.g., cbuffer member initializer. diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index aa9e8f538..bbd80462f 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit aa9e8f538041db3055ea443080e0ccc315fa114f +Subproject commit bbd80462f5c89e9a225edabaca1215032c62e459 diff --git a/include/dxc/DXIL/DxilModule.h b/include/dxc/DXIL/DxilModule.h index 2e04c4e84..b06a6418f 100644 --- a/include/dxc/DXIL/DxilModule.h +++ b/include/dxc/DXIL/DxilModule.h @@ -225,7 +225,7 @@ public: // This funciton must be called after unused resources are removed from DxilModule bool ModuleHasMulticomponentUAVLoads(); - // Compute shader. + // Compute/Mesh/Amplification shader. void SetNumThreads(unsigned x, unsigned y, unsigned z); unsigned GetNumThreads(unsigned idx) const; diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 0eeb0e8e4..8eaece8e2 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -281,6 +281,8 @@ def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group, Fl HelpText<"Specify SPIR-V extension permitted to use">; def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">; +def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group, Flags<[CoreOption, DriverOption]>, + HelpText<"Flatten arrays of resources so each array element takes one binding number">; def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group, Flags<[CoreOption, DriverOption, HelpHidden]>, HelpText<"Do not emit warnings for ingored features resulting from no Vulkan support">; def Wno_vk_emulated_features : Joined<["-"], "Wno-vk-emulated-features">, Group, Flags<[CoreOption, DriverOption, HelpHidden]>, diff --git a/include/dxc/Support/SPIRVOptions.h b/include/dxc/Support/SPIRVOptions.h index 667195c9b..1612c9123 100644 --- a/include/dxc/Support/SPIRVOptions.h +++ b/include/dxc/Support/SPIRVOptions.h @@ -53,6 +53,7 @@ struct SpirvCodeGenOptions { bool useDxLayout; bool useGlLayout; bool useScalarLayout; + bool flattenResourceArrays; SpirvLayoutRule cBufferLayoutRule; SpirvLayoutRule sBufferLayoutRule; SpirvLayoutRule tBufferLayoutRule; diff --git a/include/dxc/Support/WinAdapter.h b/include/dxc/Support/WinAdapter.h index 0f394347c..b02899244 100644 --- a/include/dxc/Support/WinAdapter.h +++ b/include/dxc/Support/WinAdapter.h @@ -46,16 +46,33 @@ #define CoTaskMemFree free #define SysFreeString free -#define SysAllocStringLen(ptr, size) (wchar_t*)realloc(ptr, (size + 1)*sizeof(wchar_t)) +#define SysAllocStringLen(ptr, size) \ + (wchar_t *)realloc(ptr, (size + 1) * sizeof(wchar_t)) #define ARRAYSIZE(array) (sizeof(array) / sizeof(array[0])) #define _countof(a) (sizeof(a) / sizeof(*(a))) +// If it is GCC, there is no UUID support and we must emulate it. +#ifdef __APPLE__ +#define __EMULATE_UUID 1 +#else // __APPLE__ +#ifdef __GNUC__ +#ifndef __clang__ +#define __EMULATE_UUID 1 +#endif // __GNUC__ +#endif // __clang__ +#endif // __APPLE__ + +#ifdef __EMULATE_UUID #define __declspec(x) +#endif // __EMULATE_UUID + #define DECLSPEC_SELECTANY +#ifdef __EMULATE_UUID #define uuid(id) +#endif // __EMULATE_UUID #define STDMETHODCALLTYPE #define STDAPI extern "C" HRESULT STDAPICALLTYPE @@ -188,7 +205,8 @@ #define OutputDebugStringA(msg) fputs(msg, stderr) #define OutputDebugFormatA(...) fprintf(stderr, __VA_ARGS__) -#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace, BackTraceHash)\ +#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace, \ + BackTraceHash) \ backtrace(BackTrace, FramesToCapture) // Event Tracing for Windows (ETW) provides application programmers the ability @@ -413,19 +431,55 @@ typedef void *HMODULE; //===--------------------- ID Types and Macros for COM --------------------===// -struct GUID { +#ifdef __EMULATE_UUID +struct GUID +#else // __EMULATE_UUID +// These specific definitions are required by clang -fms-extensions. +typedef struct _GUID +#endif // __EMULATE_UUID +{ uint32_t Data1; uint16_t Data2; uint16_t Data3; uint8_t Data4[8]; -}; +} +#ifdef __EMULATE_UUID +; +#else // __EMULATE_UUID +GUID; +#endif // __EMULATE_UUID typedef GUID CLSID; typedef const GUID &REFGUID; -typedef const void *REFIID; typedef const GUID &REFCLSID; +#ifdef __EMULATE_UUID +typedef const void *REFIID; #define IsEqualIID(a, b) a == b #define IsEqualCLSID(a, b) !memcmp(&a, &b, sizeof(GUID)) +#else // __EMULATE_UUID +typedef GUID IID; +typedef IID *LPIID; +typedef const IID &REFIID; +inline bool IsEqualGUID(REFGUID rguid1, REFGUID rguid2) { + return !memcmp(&rguid1, &rguid2, sizeof(GUID)); +} + +inline bool operator==(REFGUID guidOne, REFGUID guidOther) { + return !!IsEqualGUID(guidOne, guidOther); +} + +inline bool operator!=(REFGUID guidOne, REFGUID guidOther) { + return !(guidOne == guidOther); +} + +inline bool IsEqualIID(REFIID riid1, REFIID riid2) { + return IsEqualGUID(riid1, riid2); +} + +inline bool IsEqualCLSID(REFCLSID rclsid1, REFCLSID rclsid2) { + return IsEqualGUID(rclsid1, rclsid2); +} +#endif // __EMULATE_UUID //===--------------------- Struct Types -----------------------------------===// @@ -503,22 +557,37 @@ enum tagSTATFLAG { //===--------------------- UUID Related Macros ----------------------------===// +#ifdef __EMULATE_UUID + // The following macros are defined to facilitate the lack of 'uuid' on Linux. #define DECLARE_CROSS_PLATFORM_UUIDOF(T) \ public: \ static REFIID uuidof() { return static_cast(&T##_ID); } \ \ private: \ - __attribute__ ((visibility ("default"))) static const char T##_ID; + __attribute__((visibility("default"))) static const char T##_ID; -#define DEFINE_CROSS_PLATFORM_UUIDOF(T) __attribute__ ((visibility ("default"))) const char T::T##_ID = '\0'; +#define DEFINE_CROSS_PLATFORM_UUIDOF(T) \ + __attribute__((visibility("default"))) const char T::T##_ID = '\0'; #define __uuidof(T) T::uuidof() #define IID_PPV_ARGS(ppType) \ (**(ppType)).uuidof(), reinterpret_cast(ppType) +#else // __EMULATE_UUID + +#define DECLARE_CROSS_PLATFORM_UUIDOF(T) +#define DEFINE_CROSS_PLATFORM_UUIDOF(T) + +template inline void **IID_PPV_ARGS_Helper(T **pp) { + return reinterpret_cast(pp); +} +#define IID_PPV_ARGS(ppType) __uuidof(**(ppType)), IID_PPV_ARGS_Helper(ppType) + +#endif // __EMULATE_UUID + //===--------------------- COM Interfaces ---------------------------------===// -struct IUnknown { +struct __declspec(uuid("00000000-0000-0000-C000-000000000046")) IUnknown { virtual HRESULT QueryInterface(REFIID riid, void **ppvObject) = 0; virtual ULONG AddRef(); virtual ULONG Release(); @@ -533,25 +602,29 @@ private: DECLARE_CROSS_PLATFORM_UUIDOF(IUnknown) }; -struct INoMarshal : public IUnknown { +struct __declspec(uuid("ECC8691B-C1DB-4DC0-855E-65F6C551AF49")) INoMarshal + : public IUnknown { DECLARE_CROSS_PLATFORM_UUIDOF(INoMarshal) }; -struct IMalloc : public IUnknown { +struct __declspec(uuid("00000002-0000-0000-C000-000000000046")) IMalloc + : public IUnknown { virtual void *Alloc(size_t size); virtual void *Realloc(void *ptr, size_t size); virtual void Free(void *ptr); virtual HRESULT QueryInterface(REFIID riid, void **ppvObject); }; -struct ISequentialStream : public IUnknown { +struct __declspec(uuid("0C733A30-2A1C-11CE-ADE5-00AA0044773D")) + ISequentialStream : public IUnknown { virtual HRESULT Read(void *pv, ULONG cb, ULONG *pcbRead) = 0; virtual HRESULT Write(const void *pv, ULONG cb, ULONG *pcbWritten) = 0; DECLARE_CROSS_PLATFORM_UUIDOF(ISequentialStream) }; -struct IStream : public ISequentialStream { +struct __declspec(uuid("0000000c-0000-0000-C000-000000000046")) IStream + : public ISequentialStream { virtual HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin, ULARGE_INTEGER *plibNewPosition) = 0; virtual HRESULT SetSize(ULARGE_INTEGER libNewSize) = 0; diff --git a/lib/DXIL/DxilModule.cpp b/lib/DXIL/DxilModule.cpp index d166aa18a..0194a025c 100644 --- a/lib/DXIL/DxilModule.cpp +++ b/lib/DXIL/DxilModule.cpp @@ -366,24 +366,30 @@ void DxilModule::CollectShaderFlagsForModule() { } void DxilModule::SetNumThreads(unsigned x, unsigned y, unsigned z) { - DXASSERT(m_DxilEntryPropsMap.size() == 1 && m_pSM->IsCS(), - "only works for CS profile"); + DXASSERT(m_DxilEntryPropsMap.size() == 1 && + (m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()), + "only works for CS/MS/AS profiles"); DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props; - DXASSERT(props.IsCS(), "Must be CS profile"); - unsigned *numThreads = props.ShaderProps.CS.numThreads; + DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind); + unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads : + props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads; numThreads[0] = x; numThreads[1] = y; numThreads[2] = z; } unsigned DxilModule::GetNumThreads(unsigned idx) const { + DXASSERT(m_DxilEntryPropsMap.size() == 1 && + (m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()), + "only works for CS/MS/AS profiles"); DXASSERT(idx < 3, "Thread dimension index must be 0-2"); - if (!m_pSM->IsCS()) - return 0; - DXASSERT(m_DxilEntryPropsMap.size() == 1, "should have one entry prop"); __analysis_assume(idx < 3); + if (!(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS())) + return 0; const DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props; - DXASSERT(props.IsCS(), "Must be CS profile"); - return props.ShaderProps.CS.numThreads[idx]; + DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind); + const unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads : + props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads; + return numThreads[idx]; } DXIL::InputPrimitive DxilModule::GetInputPrimitive() const { diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 1a32cf62a..fc7f6e5bc 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -717,6 +717,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, opts.SpirvOptions.enableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false); opts.SpirvOptions.noWarnIgnoredFeatures = Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false); opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false); + opts.SpirvOptions.flattenResourceArrays = + Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false); if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) || !handleVkShiftArgs(Args, OPT_fvk_t_shift, "t", &opts.SpirvOptions.tShift, errors) || @@ -791,6 +793,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) || Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) || Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) || + Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) || Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) || Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) || Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) || diff --git a/lib/HLSL/DxilValidation.cpp b/lib/HLSL/DxilValidation.cpp index 75f2b0b1a..87f926520 100644 --- a/lib/HLSL/DxilValidation.cpp +++ b/lib/HLSL/DxilValidation.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Verifier.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/MemoryBuffer.h" @@ -3870,6 +3871,12 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) { } } +static void ValidateBitcode(ValidationContext &ValCtx) { + if (llvm::verifyModule(ValCtx.M, &ValCtx.DiagStream())) { + ValCtx.EmitError(ValidationRule::BitcodeValid); + } +} + static void ValidateMetadata(ValidationContext &ValCtx) { Module *pModule = &ValCtx.M; const std::string &target = pModule->getTargetTriple(); @@ -5626,6 +5633,8 @@ ValidateDxilModule(llvm::Module *pModule, llvm::Module *pDebugModule) { ValidationContext ValCtx(*pModule, pDebugModule, *pDxilModule, DiagPrinter); + ValidateBitcode(ValCtx); + ValidateMetadata(ValCtx); ValidateShaderState(ValCtx); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 0c6700075..3ea202e7a 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -1981,7 +1981,7 @@ Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // bool ne = val != 0; Value *notZero = Builder.CreateFCmpUNE(val, zeroVal); - notZero = Builder.CreateZExt(notZero, dstTy); + notZero = Builder.CreateSExt(notZero, dstTy); Value *intVal = Builder.CreateBitCast(val, dstTy); // temp = intVal & exponentMask; diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index cc54d3d04..f883fa1ce 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -3246,11 +3246,14 @@ static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder) C->removeDeadConstantUsers(); } -static void ReplaceUnboundedArrayUses(Value *V, Value *Src, IRBuilder<> &Builder) { +static void ReplaceUnboundedArrayUses(Value *V, Value *Src) { for (auto it = V->user_begin(); it != V->user_end(); ) { User *U = *(it++); if (GetElementPtrInst *GEP = dyn_cast(U)) { SmallVector idxList(GEP->idx_begin(), GEP->idx_end()); + // Must set the insert point to the GEP itself (instead of the memcpy), + // because the indices might not dominate the memcpy. + IRBuilder<> Builder(GEP); Value *NewGEP = Builder.CreateGEP(Src, idxList); GEP->replaceAllUsesWith(NewGEP); } else if (BitCastInst *BC = dyn_cast(U)) { @@ -3392,8 +3395,7 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC, } } else { DXASSERT(IsUnboundedArrayMemcpy(TyV, TySrc), "otherwise mismatched types in memcpy are not unbounded array"); - IRBuilder<> Builder(MC); - ReplaceUnboundedArrayUses(V, Src, Builder); + ReplaceUnboundedArrayUses(V, Src); } } diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index b1e4a8c0c..c6f34abc9 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -716,7 +716,7 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var) { const auto *bindingAttr = var->getAttr(); const auto *counterBindingAttr = var->getAttr(); - resourceVars.emplace_back(varInstr, loc, regAttr, bindingAttr, + resourceVars.emplace_back(varInstr, var, loc, regAttr, bindingAttr, counterBindingAttr); if (const auto *inputAttachment = var->getAttr()) @@ -846,7 +846,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) { astDecls[varDecl] = DeclSpirvInfo(bufferVar, index++); } resourceVars.emplace_back( - bufferVar, decl->getLocation(), getResourceBinding(decl), + bufferVar, decl, decl->getLocation(), getResourceBinding(decl), decl->getAttr(), decl->getAttr()); return bufferVar; @@ -890,7 +890,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const VarDecl *decl) { // We register the VarDecl here. astDecls[decl] = DeclSpirvInfo(bufferVar); resourceVars.emplace_back( - bufferVar, decl->getLocation(), getResourceBinding(context), + bufferVar, decl, decl->getLocation(), getResourceBinding(context), decl->getAttr(), decl->getAttr()); return bufferVar; @@ -970,8 +970,8 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) { context, /*arraySize*/ 0, ContextUsageKind::Globals, "type.$Globals", "$Globals"); - resourceVars.emplace_back(globals, SourceLocation(), nullptr, nullptr, - nullptr, /*isCounterVar*/ false, + resourceVars.emplace_back(globals, /*decl*/ nullptr, SourceLocation(), + nullptr, nullptr, nullptr, /*isCounterVar*/ false, /*isGlobalsCBuffer*/ true); uint32_t index = 0; @@ -1089,7 +1089,7 @@ void DeclResultIdMapper::createCounterVar( if (!isAlias) { // Non-alias counter variables should be put in to resourceVars so that // descriptors can be allocated for them. - resourceVars.emplace_back(counterInstr, decl->getLocation(), + resourceVars.emplace_back(counterInstr, decl, decl->getLocation(), getResourceBinding(decl), decl->getAttr(), decl->getAttr(), true); @@ -1213,26 +1213,63 @@ private: /// set and binding number. class BindingSet { public: - /// Uses the given set and binding number. - void useBinding(uint32_t binding, uint32_t set) { - usedBindings[set].insert(binding); + /// Uses the given set and binding number. Returns false if the binding number + /// was already occupied in the set, and returns true otherwise. + bool useBinding(uint32_t binding, uint32_t set) { + bool inserted = false; + std::tie(std::ignore, inserted) = usedBindings[set].insert(binding); + return inserted; } - /// Uses the next avaiable binding number in set 0. - uint32_t useNextBinding(uint32_t set) { + /// Uses the next avaiable binding number in |set|. If more than one binding + /// number is to be occupied, it finds the next available chunk that can fit + /// |numBindingsToUse| in the |set|. + uint32_t useNextBinding(uint32_t set, uint32_t numBindingsToUse = 1) { + uint32_t bindingNoStart = getNextBindingChunk(set, numBindingsToUse); auto &binding = usedBindings[set]; - auto &next = nextBindings[set]; - while (binding.count(next)) - ++next; - binding.insert(next); - return next++; + for (uint32_t i = 0; i < numBindingsToUse; ++i) + binding.insert(bindingNoStart + i); + return bindingNoStart; + } + + /// Returns the first available binding number in the |set| for which |n| + /// consecutive binding numbers are unused. + uint32_t getNextBindingChunk(uint32_t set, uint32_t n) { + auto &existingBindings = usedBindings[set]; + + // There were no bindings in this set. Can start at binding zero. + if (existingBindings.empty()) + return 0; + + // Check whether the chunk of |n| binding numbers can be fitted at the + // very beginning of the list (start at binding 0 in the current set). + uint32_t curBinding = *existingBindings.begin(); + if (curBinding >= n) + return 0; + + auto iter = std::next(existingBindings.begin()); + while (iter != existingBindings.end()) { + // There exists a next binding number that is used. Check to see if the + // gap between current binding number and next binding number is large + // enough to accommodate |n|. + uint32_t nextBinding = *iter; + if (n <= nextBinding - curBinding - 1) + return curBinding + 1; + + curBinding = nextBinding; + + // Peek at the next binding that has already been used (if any). + ++iter; + } + + // |curBinding| was the last binding that was used in this set. The next + // chunk of |n| bindings can start at |curBinding|+1. + return curBinding + 1; } private: ///< set number -> set of used binding number - llvm::DenseMap> usedBindings; - ///< set number -> next available binding number - llvm::DenseMap nextBindings; + llvm::DenseMap> usedBindings; }; } // namespace @@ -1553,11 +1590,30 @@ bool DeclResultIdMapper::decorateResourceBindings() { // Decorates the given varId of the given category with set number // setNo, binding number bindingNo. Ignores overlaps. - const auto tryToDecorate = [this, &bindingSet](SpirvVariable *var, + const auto tryToDecorate = [this, &bindingSet](const ResourceVar &var, const uint32_t setNo, const uint32_t bindingNo) { - bindingSet.useBinding(bindingNo, setNo); - spvBuilder.decorateDSetBinding(var, setNo, bindingNo); + // By default we use one binding number per resource, and an array of + // resources also gets only one binding number. However, for array of + // resources (e.g. array of textures), DX uses one binding number per array + // element. We can match this behavior via a command line option. + uint32_t numBindingsToUse = 1; + if (spirvOptions.flattenResourceArrays) + numBindingsToUse = var.getArraySize(); + + for (uint32_t i = 0; i < numBindingsToUse; ++i) { + bool success = bindingSet.useBinding(bindingNo + i, setNo); + if (!success && spirvOptions.flattenResourceArrays) { + emitError("ran into binding number conflict when assigning binding " + "number %0 in set %1", + {}) + << bindingNo << setNo; + } + } + + // No need to decorate multiple binding numbers for arrays. It will be done + // by legalization/optimization. + spvBuilder.decorateDSetBinding(var.getSpirvInstr(), setNo, bindingNo); }; for (const auto &var : resourceVars) { @@ -1570,13 +1626,12 @@ bool DeclResultIdMapper::decorateResourceBindings() { else if (const auto *reg = var.getRegister()) set = reg->RegisterSpace.getValueOr(defaultSpace); - tryToDecorate(var.getSpirvInstr(), set, vkCBinding->getBinding()); + tryToDecorate(var, set, vkCBinding->getBinding()); } } else { if (const auto *vkBinding = var.getBinding()) { // Process m1 - tryToDecorate(var.getSpirvInstr(), - getVkBindingAttrSet(vkBinding, defaultSpace), + tryToDecorate(var, getVkBindingAttrSet(vkBinding, defaultSpace), vkBinding->getBinding()); } } @@ -1617,10 +1672,18 @@ bool DeclResultIdMapper::decorateResourceBindings() { llvm_unreachable("unknown register type found"); } - tryToDecorate(var.getSpirvInstr(), set, binding); + tryToDecorate(var, set, binding); } for (const auto &var : resourceVars) { + // By default we use one binding number per resource, and an array of + // resources also gets only one binding number. However, for array of + // resources (e.g. array of textures), DX uses one binding number per array + // element. We can match this behavior via a command line option. + uint32_t numBindingsToUse = 1; + if (spirvOptions.flattenResourceArrays) + numBindingsToUse = var.getArraySize(); + if (var.isCounter()) { if (!var.getCounterBinding()) { // Process mX * c2 @@ -1630,15 +1693,17 @@ bool DeclResultIdMapper::decorateResourceBindings() { else if (const auto *reg = var.getRegister()) set = reg->RegisterSpace.getValueOr(defaultSpace); - spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set, - bindingSet.useNextBinding(set)); + spvBuilder.decorateDSetBinding( + var.getSpirvInstr(), set, + bindingSet.useNextBinding(set, numBindingsToUse)); } } else if (!var.getBinding()) { const auto *reg = var.getRegister(); if (reg && reg->isSpaceOnly()) { const uint32_t set = reg->RegisterSpace.getValueOr(defaultSpace); - spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set, - bindingSet.useNextBinding(set)); + spvBuilder.decorateDSetBinding( + var.getSpirvInstr(), set, + bindingSet.useNextBinding(set, numBindingsToUse)); } else if (!reg) { // Process m3 (no 'vk::binding' and no ':register' assignment) @@ -1653,7 +1718,7 @@ bool DeclResultIdMapper::decorateResourceBindings() { else { spvBuilder.decorateDSetBinding( var.getSpirvInstr(), defaultSpace, - bindingSet.useNextBinding(defaultSpace)); + bindingSet.useNextBinding(defaultSpace, numBindingsToUse)); } } } diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.h b/tools/clang/lib/SPIRV/DeclResultIdMapper.h index 1e98489c1..71af4868b 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.h +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.h @@ -111,12 +111,24 @@ private: class ResourceVar { public: - ResourceVar(SpirvVariable *var, SourceLocation loc, + ResourceVar(SpirvVariable *var, const Decl *decl, SourceLocation loc, const hlsl::RegisterAssignment *r, const VKBindingAttr *b, const VKCounterBindingAttr *cb, bool counter = false, bool globalsBuffer = false) : variable(var), srcLoc(loc), reg(r), binding(b), counterBinding(cb), - isCounterVar(counter), isGlobalsCBuffer(globalsBuffer) {} + isCounterVar(counter), isGlobalsCBuffer(globalsBuffer), arraySize(1) { + if (decl) { + if (const ValueDecl *valueDecl = dyn_cast(decl)) { + const QualType type = valueDecl->getType(); + if (!type.isNull() && type->isConstantArrayType()) { + if (auto constArrayType = dyn_cast(type)) { + arraySize = + static_cast(constArrayType->getSize().getZExtValue()); + } + } + } + } + } SpirvVariable *getSpirvInstr() const { return variable; } SourceLocation getSourceLocation() const { return srcLoc; } @@ -127,6 +139,7 @@ public: const VKCounterBindingAttr *getCounterBinding() const { return counterBinding; } + uint32_t getArraySize() const { return arraySize; } private: SpirvVariable *variable; ///< The variable @@ -136,6 +149,7 @@ private: const VKCounterBindingAttr *counterBinding; ///< Vulkan counter binding bool isCounterVar; ///< Couter variable or not bool isGlobalsCBuffer; ///< $Globals cbuffer or not + uint32_t arraySize; ///< Size if resource is an array }; /// A (instruction-pointer, is-alias-or-not) pair for counter variables @@ -297,6 +311,11 @@ public: SpirvVariable *createRayTracingNVStageVar(spv::StorageClass sc, const VarDecl *decl); + /// \brief Creates the taskNV stage variables for payload struct variable + /// and returns true on success. SPIR-V instructions will also be generated + /// to load/store the contents from/to *value. payloadMemOffset is incremented + /// based on payload struct member size, alignment and offset, and SPIR-V + /// decorations PerTaskNV and Offset are assigned to each member. bool createPayloadStageVars(const hlsl::SigPoint *sigPoint, spv::StorageClass sc, const NamedDecl *decl, bool asInput, QualType type, diff --git a/tools/clang/lib/SPIRV/GlPerVertex.cpp b/tools/clang/lib/SPIRV/GlPerVertex.cpp index 3ed949f64..24833527a 100644 --- a/tools/clang/lib/SPIRV/GlPerVertex.cpp +++ b/tools/clang/lib/SPIRV/GlPerVertex.cpp @@ -363,7 +363,7 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind, SpirvInstruction *vecComponent, SourceLocation loc) { assert(value); - // invocationId should only be used for HSPCOut. + // invocationId should only be used for HSPCOut or MSOut. assert(invocationId.hasValue() ? (sigPointKind == hlsl::SigPoint::Kind::HSCPOut || sigPointKind == hlsl::SigPoint::Kind::MSOut) @@ -655,7 +655,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind, // The interesting shader stage is HS. We need the InvocationID to write // out the value to the correct array element. SpirvInstruction *offset = nullptr; - QualType type; + QualType type = {}; bool isClip = false; switch (semanticKind) { case hlsl::Semantic::Kind::ClipDistance: { @@ -686,7 +686,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind, return false; } if (vecComponent) { - QualType elemType; + QualType elemType = {}; if (!isVectorType(type, &elemType)) { assert(false && "expected vector type"); } diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index e2ba32db6..9e13a8ca3 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -164,7 +164,7 @@ bool spirvToolsLegalize(spv_target_env env, std::vector *module, } bool spirvToolsOptimize(spv_target_env env, std::vector *module, - const llvm::SmallVector &flags, + clang::spirv::SpirvCodeGenOptions &spirvOptions, std::string *messages) { spvtools::Optimizer optimizer(env); @@ -176,14 +176,16 @@ bool spirvToolsOptimize(spv_target_env env, std::vector *module, spvtools::OptimizerOptions options; options.set_run_validator(false); - if (flags.empty()) { + if (spirvOptions.optConfig.empty()) { optimizer.RegisterPerformancePasses(); + if (spirvOptions.flattenResourceArrays) + optimizer.RegisterPass(spvtools::CreateDescriptorScalarReplacementPass()); optimizer.RegisterPass(spvtools::CreateCompactIdsPass()); } else { // Command line options use llvm::SmallVector and llvm::StringRef, whereas // SPIR-V optimizer uses std::vector and std::string. std::vector stdFlags; - for (const auto &f : flags) + for (const auto &f : spirvOptions.optConfig) stdFlags.push_back(f.str()); if (!optimizer.RegisterPassesFromFlags(stdFlags)) return false; @@ -662,8 +664,7 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) { // Run optimization passes if (theCompilerInstance.getCodeGenOpts().OptimizationLevel > 0) { std::string messages; - if (!spirvToolsOptimize(targetEnv, &m, spirvOptions.optConfig, - &messages)) { + if (!spirvToolsOptimize(targetEnv, &m, spirvOptions, &messages)) { emitFatalError("failed to optimize SPIR-V: %0", {}) << messages; emitNote("please file a bug report on " "https://github.com/Microsoft/DirectXShaderCompiler/issues " diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 57cb33c58..6babc2ff3 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -309,8 +309,10 @@ private: SpirvInstruction *initValue, SourceLocation loc); - /// Collects all indices from consecutive MemberExprs - /// TODO: Update method description here. + /// Collects all indices from consecutive MemberExprs, ArraySubscriptExprs and + /// CXXOperatorCallExprs. Also special handles all mesh shader out attributes + /// to return the entire expression in order for caller to extract the member + /// expression. const Expr * collectArrayStructIndices(const Expr *expr, bool rawIndex, llvm::SmallVectorImpl *rawIndices, diff --git a/tools/clang/test/CodeGenHLSL/batch/expressions/intrinsics/frexp.hlsl b/tools/clang/test/CodeGenHLSL/batch/expressions/intrinsics/frexp.hlsl new file mode 100644 index 000000000..fe66ecfa2 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/batch/expressions/intrinsics/frexp.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s + +// Make sure frexp generate code pattern. +// CHECK:bitcast float {{.*}} to i32 +// CHECK:and i32 {{.*}}, 2139095040 +// CHECK:add {{.*}}, -1056964608 +// CHECK:ashr {{.*}}, 23 +// CHECK:sitofp +// CHECK:and i32 {{.*}}, 8388607 +// CHECK:or i32 {{.*}}, 1056964608 +// CHECK:fadd + +float main(float a:A) : SV_Target { + float b; + float c = frexp ( a , b ); + + return b+c; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/batch/passes/sroa_hlsl/memcpy_dom.hlsl b/tools/clang/test/CodeGenHLSL/batch/passes/sroa_hlsl/memcpy_dom.hlsl new file mode 100644 index 000000000..76f416768 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/batch/passes/sroa_hlsl/memcpy_dom.hlsl @@ -0,0 +1,17 @@ +// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s + +// Regression test for a validation error, where parameter SROA +// would generate GEPs before the indices it uses + +// CHECK: @main + +Texture2D tex0[10] : register(t0); + +float4 f(Texture2D textures[], unsigned int idx) { + return textures[idx].Load(0); +} + +[ RootSignature("DescriptorTable(SRV(t0, numDescriptors=10))") ] +float4 main() : SV_Target { + return f(tex0, 1); +} diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.buffer.mesh.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.buffer.mesh.hlsl new file mode 100644 index 000000000..55694fef3 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.buffer.mesh.hlsl @@ -0,0 +1,202 @@ +// Run: %dxc -T ms_6_5 -E main + +// CHECK: OpCapability MeshShadingNV +// CHECK: OpExtension "SPV_NV_mesh_shader" +// CHECK: OpEntryPoint MeshNV %main "main" + +// CHECK: OpName %UserVertex "UserVertex" +struct UserVertex { +// CHECK: OpMemberName %UserVertex 0 "position" +// CHECK: OpMemberName %UserVertex 1 "texcoord" +// CHECK: OpMemberName %UserVertex 2 "color" + float3 position; + float2 texcoord; + float3 color; +}; + +// CHECK: OpName %Mesh "Mesh" +struct Mesh { +// CHECK: OpMemberName %Mesh 0 "firstSubmesh" +// CHECK: OpMemberName %Mesh 1 "submeshCount" +// CHECK: OpMemberName %Mesh 2 "dummy" + uint firstSubmesh; + uint submeshCount; + uint dummy[2]; +}; + +// CHECK: OpName %SubMesh "SubMesh" +struct SubMesh { +// CHECK: OpMemberName %SubMesh 0 "vertexCount" +// CHECK: OpMemberName %SubMesh 1 "vertexOffset" +// CHECK: OpMemberName %SubMesh 2 "primitiveCount" +// CHECK: OpMemberName %SubMesh 3 "indexOffset" +// CHECK: OpMemberName %SubMesh 4 "boundingBox" + uint vertexCount; + uint vertexOffset; + uint primitiveCount; + uint indexOffset; + float4 boundingBox[8]; +}; + +// CHECK: OpDecorate %userVertices DescriptorSet 0 +// CHECK: OpDecorate %userVertices Binding 0 +// CHECK: OpDecorate %userIndices DescriptorSet 0 +// CHECK: OpDecorate %userIndices Binding 1 +// CHECK: OpDecorate %meshes DescriptorSet 0 +// CHECK: OpDecorate %meshes Binding 2 +// CHECK: OpDecorate %submeshes DescriptorSet 0 +// CHECK: OpDecorate %submeshes Binding 3 +// CHECK: OpDecorate %UBO DescriptorSet 0 +// CHECK: OpDecorate %UBO Binding 4 + +// CHECK: OpMemberDecorate %UserVertex 0 Offset 0 +// CHECK: OpMemberDecorate %UserVertex 1 Offset 16 +// CHECK: OpMemberDecorate %UserVertex 2 Offset 32 +// CHECK: OpDecorate %_runtimearr_UserVertex ArrayStride 48 +// CHECK: OpMemberDecorate %type_RWStructuredBuffer_UserVertex 0 Offset 0 +// CHECK: OpDecorate %type_RWStructuredBuffer_UserVertex BufferBlock + +// CHECK: OpDecorate %_runtimearr_uint ArrayStride 4 +// CHECK: OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0 +// CHECK: OpDecorate %type_RWStructuredBuffer_uint BufferBlock + +// CHECK: OpMemberDecorate %Mesh 0 Offset 0 +// CHECK: OpMemberDecorate %Mesh 1 Offset 4 +// CHECK: OpMemberDecorate %Mesh 2 Offset 8 +// CHECK: OpDecorate %_runtimearr_Mesh ArrayStride 16 +// CHECK: OpMemberDecorate %type_RWStructuredBuffer_Mesh 0 Offset 0 +// CHECK: OpDecorate %type_RWStructuredBuffer_Mesh BufferBlock + +// CHECK: OpMemberDecorate %SubMesh 0 Offset 0 +// CHECK: OpMemberDecorate %SubMesh 1 Offset 4 +// CHECK: OpMemberDecorate %SubMesh 2 Offset 8 +// CHECK: OpMemberDecorate %SubMesh 3 Offset 12 +// CHECK: OpMemberDecorate %SubMesh 4 Offset 16 +// CHECK: OpDecorate %_runtimearr_SubMesh ArrayStride 144 +// CHECK: OpMemberDecorate %type_RWStructuredBuffer_SubMesh 0 Offset 0 +// CHECK: OpDecorate %type_RWStructuredBuffer_SubMesh BufferBlock + +// CHECK: OpMemberDecorate %type_UBO 0 Offset 0 +// CHECK: OpMemberDecorate %type_UBO 0 MatrixStride 16 +// CHECK: OpMemberDecorate %type_UBO 0 ColMajor +// CHECK: OpDecorate %type_UBO Block + +// CHECK: %UserVertex = OpTypeStruct %v3float %v2float %v3float +// CHECK: %_runtimearr_UserVertex = OpTypeRuntimeArray %UserVertex +// CHECK: %type_RWStructuredBuffer_UserVertex = OpTypeStruct %_runtimearr_UserVertex +// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_UserVertex = OpTypePointer Uniform %type_RWStructuredBuffer_UserVertex +[[vk::binding(0, 0)]] +RWStructuredBuffer userVertices; + +// CHECK: %_runtimearr_uint = OpTypeRuntimeArray %uint +// CHECK: %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint +// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_uint = OpTypePointer Uniform %type_RWStructuredBuffer_uint +[[vk::binding(1, 0)]] +RWStructuredBuffer userIndices; + +// CHECK: %_arr_uint_uint_2 = OpTypeArray %uint %uint_2 +// CHECK: %Mesh = OpTypeStruct %uint %uint %_arr_uint_uint_2 +// CHECK: %_runtimearr_Mesh = OpTypeRuntimeArray %Mesh +// CHECK: %type_RWStructuredBuffer_Mesh = OpTypeStruct %_runtimearr_Mesh +// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_Mesh = OpTypePointer Uniform %type_RWStructuredBuffer_Mesh +[[vk::binding(2, 0)]] +RWStructuredBuffer meshes; + +// CHECK: %uint_8 = OpConstant %uint 8 +// CHECK: %v4float = OpTypeVector %float 4 +// CHECK: %_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8 +// CHECK: %SubMesh = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8 +// CHECK: %_runtimearr_SubMesh = OpTypeRuntimeArray %SubMesh +// CHECK: %type_RWStructuredBuffer_SubMesh = OpTypeStruct %_runtimearr_SubMesh +// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_SubMesh = OpTypePointer Uniform %type_RWStructuredBuffer_SubMesh +[[vk::binding(3, 0)]] +RWStructuredBuffer submeshes; + +// CHECK: %mat4v4float = OpTypeMatrix %v4float 4 +// CHECK: %type_UBO = OpTypeStruct %mat4v4float +// CHECK: %_ptr_Uniform_type_UBO = OpTypePointer Uniform %type_UBO +[[vk::binding(4, 0)]] +cbuffer UBO { + row_major float4x4 mvp; +} + +struct PerVertex { + float4 position : SV_Position; + float2 texcoord : TEXCOORD; + float3 color : COLOR; +}; + +struct PerPrimitive { + float4 primcolor : PCOLOR; +}; + +struct SubMeshes { + uint submeshID[256] : SUBMESH; +}; + +static const uint vertsPerPrim = 3U; + +// CHECK: %userVertices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_UserVertex Uniform +// CHECK: %userIndices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_uint Uniform +// CHECK: %meshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_Mesh Uniform +// CHECK: %submeshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_SubMesh Uniform +// CHECK: %UBO = OpVariable %_ptr_Uniform_type_UBO Uniform + +[outputtopology("triangle")] +[numthreads(32, 1, 1)] +void main( + out indices uint3 primIndices[128], + out vertices PerVertex verts[128], + out primitives PerPrimitive prims[128], + in payload SubMeshes taskmem, + in uint gid : SV_GroupID, + in uint tid : SV_GroupThreadID + ) +{ + uint task = taskmem.submeshID[gid]; +// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function +// CHECK: OpAccessChain %_ptr_Uniform_SubMesh %submeshes %int_0 [[task:%\d+]] +// CHECK: OpStore %submesh [[submeshVal:%\d+]] + SubMesh submesh = submeshes[task]; +// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_0 + uint numPackedVertices = submesh.vertexCount; +// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_2 + uint numPackedPrimitives = submesh.primitiveCount; + + SetMeshOutputCounts(numPackedVertices, numPackedPrimitives); + + for (uint i = 0U; i < numPackedVertices; i += 32U) { + uint vid = i + tid; +// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1 + uint svid = vid + submesh.vertexOffset; + if (vid >= numPackedVertices) continue; +// CHECK: OpAccessChain %_ptr_Uniform_v2float %userVertices %int_0 [[svid_1:%\d+]] %int_1 + verts[vid].texcoord = userVertices[svid].texcoord; +// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_2:%\d+]] %int_2 + verts[vid].color = userVertices[svid].color; +// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_0:%\d+]] %int_0 + float3 position = userVertices[svid].position; +// CHECK: OpAccessChain %_ptr_Uniform_mat4v4float %UBO %int_0 + verts[vid].position = mul(mvp, float4(position, 1.0)); + } + + GroupMemoryBarrier(); + + for (uint j = 0U; j < numPackedPrimitives; j += 32U) { + uint pid = j + tid; + uint didxoff = vertsPerPrim * pid; +// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_3 + uint sidxoff = submesh.indexOffset + didxoff; + if (pid >= numPackedPrimitives) continue; +// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_0:%\d+]] +// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_1:%\d+]] +// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_2:%\d+]] + primIndices[pid] = uint3(userIndices[sidxoff], userIndices[sidxoff+1], userIndices[sidxoff+2]); +// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1 +// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[ind:%\d+]] + uint providx = submesh.vertexOffset + userIndices[sidxoff + vertsPerPrim - 1U]; +// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[providx:%\d+]] %int_2 + prims[pid].primcolor = float4(userVertices[providx].color, 1.0); + } +} + diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.hlsl new file mode 100644 index 000000000..ce8385772 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.nv.fncall.amplification.hlsl @@ -0,0 +1,135 @@ +// Run: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1 + +// CHECK: OpCapability MeshShadingNV +// CHECK: OpExtension "SPV_NV_mesh_shader" +// CHECK: OpEntryPoint TaskNV %main "main" + +struct SubMesh { + uint vertexCount; + uint vertexOffset; + uint primitiveCount; + uint indexOffset; + float4 boundingBox[8]; +}; + +struct Mesh { + uint firstSubmesh; + uint submeshCount; + uint dummy[2]; +}; + +struct UserVertex { + float3 position; + float2 texcoord; + float3 color; +}; + +[[vk::binding(0, 0)]] +RWStructuredBuffer userVertices; + +[[vk::binding(1, 0)]] +RWStructuredBuffer userIndices; + +[[vk::binding(2, 0)]] +RWStructuredBuffer meshes; + +[[vk::binding(3, 0)]] +RWStructuredBuffer submeshes; + +[[vk::binding(4, 0)]] +cbuffer UBO { + row_major float4x4 mvp; +} + +groupshared uint passedSubmeshes; +struct SubMeshes { + uint submeshID[256] : SUBMESH; +}; +groupshared SubMeshes sharedSubMeshes; + +// CHECK: %_arr_v4float_uint_8_0 = OpTypeArray %v4float %uint_8 +// CHECK: %SubMesh_0 = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8_0 +// CHECK: %_ptr_Function_SubMesh_0 = OpTypePointer Function %SubMesh_0 +// CHECK: [[funcType:%\d+]] = OpTypeFunction %bool %_ptr_Function_SubMesh_0 + +bool TestSubmesh(SubMesh submesh) { + uint clip = 0x0U; + + for (uint bbv = 0U ; bbv < 8U; bbv++) { + float4 pos= mul(mvp, submesh.boundingBox[bbv]); + if (pos.x <= pos.w) clip |= 0x1U; + if (pos.y <= 0.3333 * pos.w) clip |= 0x2U; + if (pos.z <= pos.w) clip |= 0x4U; + if (pos.x >= -pos.w) clip |= 0x8U; + if (pos.y >= -pos.w) clip |= 0x10U; + if (pos.z >= -pos.w) clip |= 0x20U; + } + return (clip == 0x3FU); +} + +[numthreads(32, 1, 1)] +void main( + in uint tid : SV_GroupThreadID, + in uint mid : SV_GroupID + ) +{ + uint firstSubmesh = meshes[mid].firstSubmesh; + uint submeshCount = meshes[mid].submeshCount; + passedSubmeshes = 0U; + GroupMemoryBarrier(); + for (uint i = 0U; i < submeshCount; i += 32U) { + uint smid = firstSubmesh + i + tid; + if (smid >= firstSubmesh + submeshCount) continue; + +// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function +// CHECK: %passed = OpVariable %_ptr_Function_bool Function +// CHECK: %param_var_submesh = OpVariable %_ptr_Function_SubMesh_0 Function + SubMesh submesh = submeshes[smid]; + bool passed = true; + +// CHECK: [[submeshValue:%\d+]] = OpLoad %SubMesh_0 %submesh +// CHECK: OpStore %param_var_submesh [[submeshValue]] +// CHECK: [[rv:%\d+]] = OpFunctionCall %bool %TestSubmesh %param_var_submesh +// CHECK: [[cond:%\d+]] = OpLogicalNot %bool [[rv]] +// CHECK: OpSelectionMerge %if_merge_0 None +// CHECK: OpBranchConditional [[cond]] %if_true_0 %if_merge_0 +// CHECK: %if_true_0 = OpLabel +// CHECK: OpStore %passed %false +// CHECK: OpBranch %if_merge_0 +// CHECK: %if_merge_0 = OpLabel + if (!TestSubmesh(submesh)) passed = false; + + if (passed) { + uint ballot = WaveActiveBallot(passed).x; + uint laneMaskLT = (1 << WaveGetLaneIndex()) - 1; + uint lowerThreads = ballot & laneMaskLT; + uint slot = passedSubmeshes + WavePrefixCountBits(passed); + sharedSubMeshes.submeshID[slot] = smid; + if (lowerThreads == 0U) { + passedSubmeshes += WaveActiveCountBits(passed); + } + } + GroupMemoryBarrier(); + } + DispatchMesh(passedSubmeshes, 1, 1, sharedSubMeshes); +} + +/* bool TestSubmesh(SubMesh submesh) { ... } */ + +// CHECK: %TestSubmesh = OpFunction %bool None [[funcType]] +// CHECK: %submesh_0 = OpFunctionParameter %_ptr_Function_SubMesh_0 + +// CHECK: %bb_entry_0 = OpLabel + +// CHECK: %clip = OpVariable %_ptr_Function_uint Function +// CHECK: %bbv = OpVariable %_ptr_Function_uint Function +// CHECK: %pos = OpVariable %_ptr_Function_v4float Function + +// CHECK: %for_check_0 = OpLabel +// CHECK: %for_body_0 = OpLabel +// CHECK: %for_merge_0 = OpLabel + +// CHECK: [[clipValue:%\d+]] = OpLoad %uint %clip +// CHECK: [[retValue:%\d+]] = OpIEqual %bool [[clipValue]] %uint_63 +// CHECK: OpReturnValue [[retValue]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.error.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.error.hlsl new file mode 100644 index 000000000..4fbfa7556 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.error.hlsl @@ -0,0 +1,19 @@ +// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays + +// CHECK: error: ran into binding number conflict when assigning binding number 3 in set 0 + +Texture2D MyTextures[5] : register(t0); // Forced use of binding numbers 0, 1, 2, 3, 4. +Texture2D AnotherTexture : register(t3); // Error: Forced use of binding number 3. +SamplerState MySampler; + +float4 main(float2 TexCoord : TexCoord) : SV_Target0 { + float4 result = + MyTextures[0].Sample(MySampler, TexCoord) + + MyTextures[1].Sample(MySampler, TexCoord) + + MyTextures[2].Sample(MySampler, TexCoord) + + MyTextures[3].Sample(MySampler, TexCoord) + + MyTextures[4].Sample(MySampler, TexCoord) + + AnotherTexture.Sample(MySampler, TexCoord); + return result; +} + diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1-optimized.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1-optimized.hlsl new file mode 100644 index 000000000..e78010849 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1-optimized.hlsl @@ -0,0 +1,36 @@ +// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3 + +// CHECK: OpDecorate %AnotherTexture Binding 5 +// CHECK: OpDecorate %NextTexture Binding 6 +// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 0 +// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 1 +// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 2 +// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 3 +// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 4 +// CHECK: OpDecorate [[MySamplers0:%\d+]] Binding 7 +// CHECK: OpDecorate [[MySamplers1:%\d+]] Binding 8 + +// CHECK: [[MyTextures0]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures1]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures2]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures3]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures4]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MySamplers0]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +// CHECK: [[MySamplers1]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant +Texture2D MyTextures[5] : register(t0); +Texture2D NextTexture; // This is suppose to be t6. +Texture2D AnotherTexture : register(t5); +SamplerState MySamplers[2]; + +float4 main(float2 TexCoord : TexCoord) : SV_Target0 +{ + float4 result = + MyTextures[0].Sample(MySamplers[0], TexCoord) + + MyTextures[1].Sample(MySamplers[0], TexCoord) + + MyTextures[2].Sample(MySamplers[0], TexCoord) + + MyTextures[3].Sample(MySamplers[1], TexCoord) + + MyTextures[4].Sample(MySamplers[1], TexCoord) + + AnotherTexture.Sample(MySamplers[1], TexCoord) + + NextTexture.Sample(MySamplers[1], TexCoord); + return result; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1.hlsl new file mode 100644 index 000000000..84933fc77 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example1.hlsl @@ -0,0 +1,23 @@ +// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays + +// CHECK: OpDecorate %MyTextures Binding 0 +// CHECK: OpDecorate %AnotherTexture Binding 5 +// CHECK: OpDecorate %NextTexture Binding 6 +// CHECK: OpDecorate %MySamplers Binding 7 +Texture2D MyTextures[5] : register(t0); +Texture2D NextTexture; // This is suppose to be t6. +Texture2D AnotherTexture : register(t5); +SamplerState MySamplers[2]; + +float4 main(float2 TexCoord : TexCoord) : SV_Target0 +{ + float4 result = + MyTextures[0].Sample(MySamplers[0], TexCoord) + + MyTextures[1].Sample(MySamplers[0], TexCoord) + + MyTextures[2].Sample(MySamplers[0], TexCoord) + + MyTextures[3].Sample(MySamplers[1], TexCoord) + + MyTextures[4].Sample(MySamplers[1], TexCoord) + + AnotherTexture.Sample(MySamplers[1], TexCoord) + + NextTexture.Sample(MySamplers[1], TexCoord); + return result; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2-optimized.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2-optimized.hlsl new file mode 100644 index 000000000..915e071be --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2-optimized.hlsl @@ -0,0 +1,41 @@ +// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3 + +// CHECK: OpDecorate %AnotherTexture Binding 3 +// CHECK: OpDecorate %MySampler Binding 2 +// CHECK: OpDecorate %MySampler2 Binding 9 +// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 4 +// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 5 +// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 6 +// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 7 +// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 8 +// CHECK: OpDecorate [[MyTextures20:%\d+]] Binding 0 +// CHECK: OpDecorate [[MyTextures21:%\d+]] Binding 1 + +// CHECK: [[MyTextures0:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures1:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures2:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures3:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures4:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures20:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant +// CHECK: [[MyTextures21:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant + +Texture2D MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8]. +Texture2D AnotherTexture : register(t3); // force binding number 3. +Texture2D MyTextures2[2]; // take binding slot 0 and 1. +SamplerState MySampler; // take binding slot 2. +SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9. + +float4 main(float2 TexCoord : TexCoord) : SV_Target0 +{ + float4 result = + MyTextures[0].Sample(MySampler, TexCoord) + + MyTextures[1].Sample(MySampler, TexCoord) + + MyTextures[2].Sample(MySampler, TexCoord) + + MyTextures[3].Sample(MySampler, TexCoord) + + MyTextures[4].Sample(MySampler, TexCoord) + + MyTextures2[0].Sample(MySampler2, TexCoord) + + MyTextures2[1].Sample(MySampler2, TexCoord) + + AnotherTexture.Sample(MySampler, TexCoord); + return result; +} + diff --git a/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2.hlsl b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2.hlsl new file mode 100644 index 000000000..1be4f285a --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.binding.cl.flatten-arrays.example2.hlsl @@ -0,0 +1,29 @@ +// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays + + +// CHECK: OpDecorate %AnotherTexture Binding 3 +// CHECK: OpDecorate %MyTextures Binding 4 +// CHECK: OpDecorate %MyTextures2 Binding 0 +// CHECK: OpDecorate %MySampler Binding 2 +// CHECK: OpDecorate %MySampler2 Binding 9 + +Texture2D MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8]. +Texture2D AnotherTexture : register(t3); // force binding number 3. +Texture2D MyTextures2[2]; // take binding slot 0 and 1. +SamplerState MySampler; // take binding slot 2. +SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9. + +float4 main(float2 TexCoord : TexCoord) : SV_Target0 +{ + float4 result = + MyTextures[0].Sample(MySampler, TexCoord) + + MyTextures[1].Sample(MySampler, TexCoord) + + MyTextures[2].Sample(MySampler, TexCoord) + + MyTextures[3].Sample(MySampler, TexCoord) + + MyTextures[4].Sample(MySampler, TexCoord) + + MyTextures2[0].Sample(MySampler2, TexCoord) + + MyTextures2[1].Sample(MySampler2, TexCoord) + + AnotherTexture.Sample(MySampler, TexCoord); + return result; +} + diff --git a/tools/clang/unittests/HLSL/DxilModuleTest.cpp b/tools/clang/unittests/HLSL/DxilModuleTest.cpp index d2b4a736d..24a1e2ebd 100644 --- a/tools/clang/unittests/HLSL/DxilModuleTest.cpp +++ b/tools/clang/unittests/HLSL/DxilModuleTest.cpp @@ -62,6 +62,10 @@ public: TEST_METHOD(Precise6) TEST_METHOD(Precise7) + TEST_METHOD(CSGetNumThreads) + TEST_METHOD(MSGetNumThreads) + TEST_METHOD(ASGetNumThreads) + TEST_METHOD(SetValidatorVersion) void VerifyValidatorVersionFails( @@ -435,6 +439,64 @@ TEST_F(DxilModuleTest, Precise7) { VERIFY_ARE_EQUAL(numChecks, 4); } +TEST_F(DxilModuleTest, CSGetNumThreads) { + Compiler c(m_dllSupport); + c.Compile( + "[numthreads(8, 4, 2)]\n" + "void main() {\n" + "}\n" + , + L"cs_6_0" + ); + + DxilModule &DM = c.GetDxilModule(); + VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0)); + VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1)); + VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2)); +} + +TEST_F(DxilModuleTest, MSGetNumThreads) { + Compiler c(m_dllSupport); + if (c.SkipDxil_Test(1,5)) return; + c.Compile( + "struct MeshPerVertex { float4 pos : SV_Position; };\n" + "[numthreads(8, 4, 2)]\n" + "[outputtopology(\"triangle\")]\n" + "void main(\n" + " out indices uint3 primIndices[1]\n" + ") {\n" + " SetMeshOutputCounts(0, 0);\n" + "}\n" + , + L"ms_6_5" + ); + + DxilModule &DM = c.GetDxilModule(); + VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0)); + VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1)); + VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2)); +} + +TEST_F(DxilModuleTest, ASGetNumThreads) { + Compiler c(m_dllSupport); + if (c.SkipDxil_Test(1,5)) return; + c.Compile( + "struct Payload { uint i; };\n" + "[numthreads(8, 4, 2)]\n" + "void main() {\n" + " Payload pld = {0};\n" + " DispatchMesh(1, 1, 1, pld);\n" + "}\n" + , + L"as_6_5" + ); + + DxilModule &DM = c.GetDxilModule(); + VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0)); + VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1)); + VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2)); +} + void DxilModuleTest::VerifyValidatorVersionFails( LPCWSTR shaderModel, const std::vector &arguments, const std::vector &expectedErrors) { diff --git a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp index 035cf30b1..7d4924ccd 100644 --- a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp +++ b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp @@ -1620,6 +1620,23 @@ TEST_F(FileTest, VulkanRegisterBinding1to1MappingAssociatedCounter) { runFileTest("vk.binding.cl.register.counter.hlsl", Expect::Failure); } +// For flattening array of resources +TEST_F(FileTest, FlattenResourceArrayBindings1) { + runFileTest("vk.binding.cl.flatten-arrays.example1.hlsl"); +} +TEST_F(FileTest, FlattenResourceArrayBindings1Optimized) { + runFileTest("vk.binding.cl.flatten-arrays.example1-optimized.hlsl"); +} +TEST_F(FileTest, FlattenResourceArrayBindings2) { + runFileTest("vk.binding.cl.flatten-arrays.example2.hlsl"); +} +TEST_F(FileTest, FlattenResourceArrayBindings2Optimized) { + runFileTest("vk.binding.cl.flatten-arrays.example2-optimized.hlsl"); +} +TEST_F(FileTest, FlattenResourceArrayBindingsOverlapError) { + runFileTest("vk.binding.cl.flatten-arrays.error.hlsl", Expect::Failure); +} + // For testing the "-auto-binding-space" command line option which specifies the // "default space" for resources. TEST_F(FileTest, VulkanRegisterBindingDefaultSpaceImplicit) { @@ -2044,6 +2061,9 @@ TEST_F(FileTest, MeshShadingNVMeshLine) { TEST_F(FileTest, MeshShadingNVMeshPoint) { runFileTest("meshshading.nv.point.mesh.hlsl"); } +TEST_F(FileTest, MeshShadingNVMeshBuffer) { + runFileTest("meshshading.nv.buffer.mesh.hlsl"); +} TEST_F(FileTest, MeshShadingNVMeshError1) { runFileTest("meshshading.nv.error1.mesh.hlsl", Expect::Failure); } @@ -2089,6 +2109,10 @@ TEST_F(FileTest, MeshShadingNVMeshError14) { TEST_F(FileTest, MeshShadingNVAmplification) { runFileTest("meshshading.nv.amplification.hlsl"); } +TEST_F(FileTest, MeshShadingNVAmplificationFunCall) { + useVulkan1p1(); + runFileTest("meshshading.nv.fncall.amplification.hlsl"); +} TEST_F(FileTest, MeshShadingNVAmplificationError1) { runFileTest("meshshading.nv.error1.amplification.hlsl", Expect::Failure); }