Merge remote-tracking branch 'ms/master' into sep-reflect

This commit is contained in:
Tex Riddell 2019-08-19 00:43:25 -07:00
Родитель 892765cc4b fd4c08a10e
Коммит 0f23b6946c
28 изменённых файлов: 912 добавлений и 101 удалений

Просмотреть файл

@ -531,6 +531,13 @@ if (UNIX AND
append("-fcolor-diagnostics" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# HLSL Change Starts
# Enable -fms-extensions for clang to use MS uuid extensions for COM.
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
append("-fms-extensions -Wno-language-extension-token" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# HLSL Change Ends
# Add flags for add_dead_strip().
# FIXME: With MSVS, consider compiling with /Gy and linking with /OPT:REF?
# But MinSizeRel seems to add that automatically, so maybe disable these

Просмотреть файл

@ -3104,14 +3104,14 @@ Callable Stage
Mesh and Amplification Shaders
------------------------------
DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
Amplification shaders corresponds to Task Shaders in Vulkan.
| DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
| Amplification shaders corresponds to Task Shaders in Vulkan.
|
| Refer to following HLSL and SPIR-V specs for details:
| https://docs.microsoft.com/<TBD>
| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc
This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
|
| This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
Entry Point Attributes
~~~~~~~~~~~~~~~~~~~~~~
@ -3120,18 +3120,19 @@ shaders and are translated to SPIR-V execution modes according to the table belo
.. table:: Mapping from HLSL attribute to SPIR-V execution mode
+--------------------+----------------+-------------------------+
| HLSL Attribute | Value | SPIR-V Execution Mode |
+====================+================+=========================+
| | ``point`` | ``OutputPoints`` |
| +----------------+-------------------------+
| ``outputtopology`` | ``line`` | ``OutputLinesNV`` |
| (Mesh shader) +----------------+-------------------------+
| | ``triangle`` | ``OutputTrianglesNV`` |
+--------------------+----------------+-------------------------+
| ``numthreads`` | ``X, Y, Z`` | ``LocalSize X, Y, Z`` |
| | (X*Y*Z <= 128) | |
+--------------------+----------------+-------------------------+
+-------------------+--------------------+-------------------------+
| HLSL Attribute | Value | SPIR-V Execution Mode |
+===================+====================+=========================+
|``outputtopology`` | ``point`` | ``OutputPoints`` |
| +--------------------+-------------------------+
|``(Mesh shader)`` | ``line`` | ``OutputLinesNV`` |
| +--------------------+-------------------------+
| | ``triangle`` | ``OutputTrianglesNV`` |
+-------------------+--------------------+-------------------------+
| ``numthreads`` | ``X, Y, Z`` | ``LocalSize X, Y, Z`` |
| | | |
| | ``(X*Y*Z <= 128)`` | |
+-------------------+--------------------+-------------------------+
Intrinsics
~~~~~~~~~~
@ -3140,24 +3141,29 @@ and are translated to SPIR-V intrinsics according to the table below:
.. table:: Mapping from HLSL intrinsics to SPIR-V intrinsics
+-------------------------+--------------------+-----------------------------------------+
| HLSL Intrinsic | Parameters | SPIR-V Intrinsic |
+=========================+====================+=========================================+
| ``SetMeshOutputCounts`` | ``numVertices`` | ``PrimitiveCountNV numPrimitives`` |
| (Mesh shader) | ``numPrimitives`` | |
+-------------------------+--------------------+-----------------------------------------+
| | ``ThreadX`` | |
| ``DispatchMesh`` | ``ThreadY`` | ``OpControlBarrier`` |
| (Amplification shader) | ``ThreadZ`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
| | ``MeshPayload`` | |
+-------------------------+--------------------+-----------------------------------------+
+---------------------------+--------------------+-----------------------------------------+
| HLSL Intrinsic | Parameters | SPIR-V Intrinsic |
+===========================+====================+=========================================+
| ``SetMeshOutputCounts`` | ``numVertices`` | ``PrimitiveCountNV numPrimitives`` |
| | | |
| ``(Mesh shader)`` | ``numPrimitives`` | |
+---------------------------+--------------------+-----------------------------------------+
| ``DispatchMesh`` | ``ThreadX`` | ``OpControlBarrier`` |
| | | |
| ``(Amplification shader)``| ``ThreadY`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
| | | |
| | ``ThreadZ`` | |
| | | |
| | ``MeshPayload`` | |
+---------------------------+--------------------+-----------------------------------------+
| *For DispatchMesh intrinsic, we also emit MeshPayload as output block with PerTaskNV decoration
| Note : For ``DispatchMesh`` intrinsic, we also emit ``MeshPayload`` as output block with ``PerTaskNV`` decoration
Mesh Interface Variables
~~~~~~~~~~~~~~~~~~~~~~~~
Interface variables are defined for Mesh shaders using HLSL modifiers.
Following table gives high level overview of the mapping:
| Interface variables are defined for Mesh shaders using HLSL modifiers.
| Following table gives high level overview of the mapping:
|
.. table:: Mapping from HLSL modifiers to SPIR-V definitions
@ -3165,9 +3171,11 @@ Following table gives high level overview of the mapping:
| HLSL modifier | SPIR-V definition |
+=================+=========================================================================+
| ``indices`` | Maps to SPIR-V intrinsic ``PrimitiveIndicesNV`` |
| | |
| | Defines SPIR-V Execution Mode ``OutputPrimitivesNV <array-size>`` |
+-----------------+-------------------------------------------------------------------------+
| ``vertices`` | Maps to per-vertex out attributes |
| | |
| | Defines existing SPIR-V Execution Mode ``OutputVertices <array-size>`` |
+-----------------+-------------------------------------------------------------------------+
| ``primitives`` | Maps to per-primitive out attributes with ``PerPrimitiveNV`` decoration |
@ -3395,6 +3403,13 @@ codegen for Vulkan:
- ``-fspv-target-env=<env>``: Specifies the target environment for this compilation.
The current valid options are ``vulkan1.0`` and ``vulkan1.1``. If no target
environment is provided, ``vulkan1.0`` is used as default.
- ``-fspv-flatten-resource-arrays``: Flattens arrays of textures and samplers
into individual resources, each taking one binding number. For example, an
array of 3 textures will become 3 texture resources taking 3 binding numbers.
This makes the behavior similar to DX. Without this option, you would get 1
array object taking 1 binding number. Note that arrays of
{RW|Append|Consume}StructuredBuffers are currently not supported in the
SPIR-V backend.
- ``-Wno-vk-ignored-features``: Does not emit warnings on ignored features
resulting from no Vulkan support, e.g., cbuffer member initializer.

2
external/SPIRV-Tools поставляемый

@ -1 +1 @@
Subproject commit aa9e8f538041db3055ea443080e0ccc315fa114f
Subproject commit bbd80462f5c89e9a225edabaca1215032c62e459

Просмотреть файл

@ -225,7 +225,7 @@ public:
// This funciton must be called after unused resources are removed from DxilModule
bool ModuleHasMulticomponentUAVLoads();
// Compute shader.
// Compute/Mesh/Amplification shader.
void SetNumThreads(unsigned x, unsigned y, unsigned z);
unsigned GetNumThreads(unsigned idx) const;

Просмотреть файл

@ -281,6 +281,8 @@ def fspv_extension_EQ : Joined<["-"], "fspv-extension=">, Group<spirv_Group>, Fl
HelpText<"Specify SPIR-V extension permitted to use">;
def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
HelpText<"Flatten arrays of resources so each array element takes one binding number">;
def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,
HelpText<"Do not emit warnings for ingored features resulting from no Vulkan support">;
def Wno_vk_emulated_features : Joined<["-"], "Wno-vk-emulated-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,

Просмотреть файл

@ -53,6 +53,7 @@ struct SpirvCodeGenOptions {
bool useDxLayout;
bool useGlLayout;
bool useScalarLayout;
bool flattenResourceArrays;
SpirvLayoutRule cBufferLayoutRule;
SpirvLayoutRule sBufferLayoutRule;
SpirvLayoutRule tBufferLayoutRule;

Просмотреть файл

@ -46,16 +46,33 @@
#define CoTaskMemFree free
#define SysFreeString free
#define SysAllocStringLen(ptr, size) (wchar_t*)realloc(ptr, (size + 1)*sizeof(wchar_t))
#define SysAllocStringLen(ptr, size) \
(wchar_t *)realloc(ptr, (size + 1) * sizeof(wchar_t))
#define ARRAYSIZE(array) (sizeof(array) / sizeof(array[0]))
#define _countof(a) (sizeof(a) / sizeof(*(a)))
// If it is GCC, there is no UUID support and we must emulate it.
#ifdef __APPLE__
#define __EMULATE_UUID 1
#else // __APPLE__
#ifdef __GNUC__
#ifndef __clang__
#define __EMULATE_UUID 1
#endif // __GNUC__
#endif // __clang__
#endif // __APPLE__
#ifdef __EMULATE_UUID
#define __declspec(x)
#endif // __EMULATE_UUID
#define DECLSPEC_SELECTANY
#ifdef __EMULATE_UUID
#define uuid(id)
#endif // __EMULATE_UUID
#define STDMETHODCALLTYPE
#define STDAPI extern "C" HRESULT STDAPICALLTYPE
@ -188,7 +205,8 @@
#define OutputDebugStringA(msg) fputs(msg, stderr)
#define OutputDebugFormatA(...) fprintf(stderr, __VA_ARGS__)
#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace, BackTraceHash)\
#define CaptureStackBackTrace(FramesToSkip, FramesToCapture, BackTrace, \
BackTraceHash) \
backtrace(BackTrace, FramesToCapture)
// Event Tracing for Windows (ETW) provides application programmers the ability
@ -413,19 +431,55 @@ typedef void *HMODULE;
//===--------------------- ID Types and Macros for COM --------------------===//
struct GUID {
#ifdef __EMULATE_UUID
struct GUID
#else // __EMULATE_UUID
// These specific definitions are required by clang -fms-extensions.
typedef struct _GUID
#endif // __EMULATE_UUID
{
uint32_t Data1;
uint16_t Data2;
uint16_t Data3;
uint8_t Data4[8];
};
}
#ifdef __EMULATE_UUID
;
#else // __EMULATE_UUID
GUID;
#endif // __EMULATE_UUID
typedef GUID CLSID;
typedef const GUID &REFGUID;
typedef const void *REFIID;
typedef const GUID &REFCLSID;
#ifdef __EMULATE_UUID
typedef const void *REFIID;
#define IsEqualIID(a, b) a == b
#define IsEqualCLSID(a, b) !memcmp(&a, &b, sizeof(GUID))
#else // __EMULATE_UUID
typedef GUID IID;
typedef IID *LPIID;
typedef const IID &REFIID;
inline bool IsEqualGUID(REFGUID rguid1, REFGUID rguid2) {
return !memcmp(&rguid1, &rguid2, sizeof(GUID));
}
inline bool operator==(REFGUID guidOne, REFGUID guidOther) {
return !!IsEqualGUID(guidOne, guidOther);
}
inline bool operator!=(REFGUID guidOne, REFGUID guidOther) {
return !(guidOne == guidOther);
}
inline bool IsEqualIID(REFIID riid1, REFIID riid2) {
return IsEqualGUID(riid1, riid2);
}
inline bool IsEqualCLSID(REFCLSID rclsid1, REFCLSID rclsid2) {
return IsEqualGUID(rclsid1, rclsid2);
}
#endif // __EMULATE_UUID
//===--------------------- Struct Types -----------------------------------===//
@ -503,22 +557,37 @@ enum tagSTATFLAG {
//===--------------------- UUID Related Macros ----------------------------===//
#ifdef __EMULATE_UUID
// The following macros are defined to facilitate the lack of 'uuid' on Linux.
#define DECLARE_CROSS_PLATFORM_UUIDOF(T) \
public: \
static REFIID uuidof() { return static_cast<REFIID>(&T##_ID); } \
\
private: \
__attribute__ ((visibility ("default"))) static const char T##_ID;
__attribute__((visibility("default"))) static const char T##_ID;
#define DEFINE_CROSS_PLATFORM_UUIDOF(T) __attribute__ ((visibility ("default"))) const char T::T##_ID = '\0';
#define DEFINE_CROSS_PLATFORM_UUIDOF(T) \
__attribute__((visibility("default"))) const char T::T##_ID = '\0';
#define __uuidof(T) T::uuidof()
#define IID_PPV_ARGS(ppType) \
(**(ppType)).uuidof(), reinterpret_cast<void **>(ppType)
#else // __EMULATE_UUID
#define DECLARE_CROSS_PLATFORM_UUIDOF(T)
#define DEFINE_CROSS_PLATFORM_UUIDOF(T)
template <typename T> inline void **IID_PPV_ARGS_Helper(T **pp) {
return reinterpret_cast<void **>(pp);
}
#define IID_PPV_ARGS(ppType) __uuidof(**(ppType)), IID_PPV_ARGS_Helper(ppType)
#endif // __EMULATE_UUID
//===--------------------- COM Interfaces ---------------------------------===//
struct IUnknown {
struct __declspec(uuid("00000000-0000-0000-C000-000000000046")) IUnknown {
virtual HRESULT QueryInterface(REFIID riid, void **ppvObject) = 0;
virtual ULONG AddRef();
virtual ULONG Release();
@ -533,25 +602,29 @@ private:
DECLARE_CROSS_PLATFORM_UUIDOF(IUnknown)
};
struct INoMarshal : public IUnknown {
struct __declspec(uuid("ECC8691B-C1DB-4DC0-855E-65F6C551AF49")) INoMarshal
: public IUnknown {
DECLARE_CROSS_PLATFORM_UUIDOF(INoMarshal)
};
struct IMalloc : public IUnknown {
struct __declspec(uuid("00000002-0000-0000-C000-000000000046")) IMalloc
: public IUnknown {
virtual void *Alloc(size_t size);
virtual void *Realloc(void *ptr, size_t size);
virtual void Free(void *ptr);
virtual HRESULT QueryInterface(REFIID riid, void **ppvObject);
};
struct ISequentialStream : public IUnknown {
struct __declspec(uuid("0C733A30-2A1C-11CE-ADE5-00AA0044773D"))
ISequentialStream : public IUnknown {
virtual HRESULT Read(void *pv, ULONG cb, ULONG *pcbRead) = 0;
virtual HRESULT Write(const void *pv, ULONG cb, ULONG *pcbWritten) = 0;
DECLARE_CROSS_PLATFORM_UUIDOF(ISequentialStream)
};
struct IStream : public ISequentialStream {
struct __declspec(uuid("0000000c-0000-0000-C000-000000000046")) IStream
: public ISequentialStream {
virtual HRESULT Seek(LARGE_INTEGER dlibMove, DWORD dwOrigin,
ULARGE_INTEGER *plibNewPosition) = 0;
virtual HRESULT SetSize(ULARGE_INTEGER libNewSize) = 0;

Просмотреть файл

@ -366,24 +366,30 @@ void DxilModule::CollectShaderFlagsForModule() {
}
void DxilModule::SetNumThreads(unsigned x, unsigned y, unsigned z) {
DXASSERT(m_DxilEntryPropsMap.size() == 1 && m_pSM->IsCS(),
"only works for CS profile");
DXASSERT(m_DxilEntryPropsMap.size() == 1 &&
(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()),
"only works for CS/MS/AS profiles");
DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props;
DXASSERT(props.IsCS(), "Must be CS profile");
unsigned *numThreads = props.ShaderProps.CS.numThreads;
DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind);
unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads :
props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads;
numThreads[0] = x;
numThreads[1] = y;
numThreads[2] = z;
}
unsigned DxilModule::GetNumThreads(unsigned idx) const {
DXASSERT(m_DxilEntryPropsMap.size() == 1 &&
(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()),
"only works for CS/MS/AS profiles");
DXASSERT(idx < 3, "Thread dimension index must be 0-2");
if (!m_pSM->IsCS())
return 0;
DXASSERT(m_DxilEntryPropsMap.size() == 1, "should have one entry prop");
__analysis_assume(idx < 3);
if (!(m_pSM->IsCS() || m_pSM->IsMS() || m_pSM->IsAS()))
return 0;
const DxilFunctionProps &props = m_DxilEntryPropsMap.begin()->second->props;
DXASSERT(props.IsCS(), "Must be CS profile");
return props.ShaderProps.CS.numThreads[idx];
DXASSERT_NOMSG(m_pSM->GetKind() == props.shaderKind);
const unsigned *numThreads = props.IsCS() ? props.ShaderProps.CS.numThreads :
props.IsMS() ? props.ShaderProps.MS.numThreads : props.ShaderProps.AS.numThreads;
return numThreads[idx];
}
DXIL::InputPrimitive DxilModule::GetInputPrimitive() const {

Просмотреть файл

@ -717,6 +717,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
opts.SpirvOptions.enableReflect = Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false);
opts.SpirvOptions.noWarnIgnoredFeatures = Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false);
opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false);
opts.SpirvOptions.flattenResourceArrays =
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false);
if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) ||
!handleVkShiftArgs(Args, OPT_fvk_t_shift, "t", &opts.SpirvOptions.tShift, errors) ||
@ -791,6 +793,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
Args.hasFlag(OPT_fvk_use_gl_layout, OPT_INVALID, false) ||
Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) ||
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) ||
Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) ||
Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) ||

Просмотреть файл

@ -39,6 +39,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Verifier.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/MemoryBuffer.h"
@ -3870,6 +3871,12 @@ static void ValidateTypeAnnotation(ValidationContext &ValCtx) {
}
}
static void ValidateBitcode(ValidationContext &ValCtx) {
if (llvm::verifyModule(ValCtx.M, &ValCtx.DiagStream())) {
ValCtx.EmitError(ValidationRule::BitcodeValid);
}
}
static void ValidateMetadata(ValidationContext &ValCtx) {
Module *pModule = &ValCtx.M;
const std::string &target = pModule->getTargetTriple();
@ -5626,6 +5633,8 @@ ValidateDxilModule(llvm::Module *pModule, llvm::Module *pDebugModule) {
ValidationContext ValCtx(*pModule, pDebugModule, *pDxilModule, DiagPrinter);
ValidateBitcode(ValCtx);
ValidateMetadata(ValCtx);
ValidateShaderState(ValCtx);

Просмотреть файл

@ -1981,7 +1981,7 @@ Value *TranslateFrexp(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
// bool ne = val != 0;
Value *notZero = Builder.CreateFCmpUNE(val, zeroVal);
notZero = Builder.CreateZExt(notZero, dstTy);
notZero = Builder.CreateSExt(notZero, dstTy);
Value *intVal = Builder.CreateBitCast(val, dstTy);
// temp = intVal & exponentMask;

Просмотреть файл

@ -3246,11 +3246,14 @@ static void ReplaceConstantWithInst(Constant *C, Value *V, IRBuilder<> &Builder)
C->removeDeadConstantUsers();
}
static void ReplaceUnboundedArrayUses(Value *V, Value *Src, IRBuilder<> &Builder) {
static void ReplaceUnboundedArrayUses(Value *V, Value *Src) {
for (auto it = V->user_begin(); it != V->user_end(); ) {
User *U = *(it++);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
SmallVector<Value *, 4> idxList(GEP->idx_begin(), GEP->idx_end());
// Must set the insert point to the GEP itself (instead of the memcpy),
// because the indices might not dominate the memcpy.
IRBuilder<> Builder(GEP);
Value *NewGEP = Builder.CreateGEP(Src, idxList);
GEP->replaceAllUsesWith(NewGEP);
} else if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
@ -3392,8 +3395,7 @@ static void ReplaceMemcpy(Value *V, Value *Src, MemCpyInst *MC,
}
} else {
DXASSERT(IsUnboundedArrayMemcpy(TyV, TySrc), "otherwise mismatched types in memcpy are not unbounded array");
IRBuilder<> Builder(MC);
ReplaceUnboundedArrayUses(V, Src, Builder);
ReplaceUnboundedArrayUses(V, Src);
}
}

Просмотреть файл

@ -716,7 +716,7 @@ SpirvVariable *DeclResultIdMapper::createExternVar(const VarDecl *var) {
const auto *bindingAttr = var->getAttr<VKBindingAttr>();
const auto *counterBindingAttr = var->getAttr<VKCounterBindingAttr>();
resourceVars.emplace_back(varInstr, loc, regAttr, bindingAttr,
resourceVars.emplace_back(varInstr, var, loc, regAttr, bindingAttr,
counterBindingAttr);
if (const auto *inputAttachment = var->getAttr<VKInputAttachmentIndexAttr>())
@ -846,7 +846,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const HLSLBufferDecl *decl) {
astDecls[varDecl] = DeclSpirvInfo(bufferVar, index++);
}
resourceVars.emplace_back(
bufferVar, decl->getLocation(), getResourceBinding(decl),
bufferVar, decl, decl->getLocation(), getResourceBinding(decl),
decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
return bufferVar;
@ -890,7 +890,7 @@ SpirvVariable *DeclResultIdMapper::createCTBuffer(const VarDecl *decl) {
// We register the VarDecl here.
astDecls[decl] = DeclSpirvInfo(bufferVar);
resourceVars.emplace_back(
bufferVar, decl->getLocation(), getResourceBinding(context),
bufferVar, decl, decl->getLocation(), getResourceBinding(context),
decl->getAttr<VKBindingAttr>(), decl->getAttr<VKCounterBindingAttr>());
return bufferVar;
@ -970,8 +970,8 @@ void DeclResultIdMapper::createGlobalsCBuffer(const VarDecl *var) {
context, /*arraySize*/ 0, ContextUsageKind::Globals, "type.$Globals",
"$Globals");
resourceVars.emplace_back(globals, SourceLocation(), nullptr, nullptr,
nullptr, /*isCounterVar*/ false,
resourceVars.emplace_back(globals, /*decl*/ nullptr, SourceLocation(),
nullptr, nullptr, nullptr, /*isCounterVar*/ false,
/*isGlobalsCBuffer*/ true);
uint32_t index = 0;
@ -1089,7 +1089,7 @@ void DeclResultIdMapper::createCounterVar(
if (!isAlias) {
// Non-alias counter variables should be put in to resourceVars so that
// descriptors can be allocated for them.
resourceVars.emplace_back(counterInstr, decl->getLocation(),
resourceVars.emplace_back(counterInstr, decl, decl->getLocation(),
getResourceBinding(decl),
decl->getAttr<VKBindingAttr>(),
decl->getAttr<VKCounterBindingAttr>(), true);
@ -1213,26 +1213,63 @@ private:
/// set and binding number.
class BindingSet {
public:
/// Uses the given set and binding number.
void useBinding(uint32_t binding, uint32_t set) {
usedBindings[set].insert(binding);
/// Uses the given set and binding number. Returns false if the binding number
/// was already occupied in the set, and returns true otherwise.
bool useBinding(uint32_t binding, uint32_t set) {
bool inserted = false;
std::tie(std::ignore, inserted) = usedBindings[set].insert(binding);
return inserted;
}
/// Uses the next avaiable binding number in set 0.
uint32_t useNextBinding(uint32_t set) {
/// Uses the next avaiable binding number in |set|. If more than one binding
/// number is to be occupied, it finds the next available chunk that can fit
/// |numBindingsToUse| in the |set|.
uint32_t useNextBinding(uint32_t set, uint32_t numBindingsToUse = 1) {
uint32_t bindingNoStart = getNextBindingChunk(set, numBindingsToUse);
auto &binding = usedBindings[set];
auto &next = nextBindings[set];
while (binding.count(next))
++next;
binding.insert(next);
return next++;
for (uint32_t i = 0; i < numBindingsToUse; ++i)
binding.insert(bindingNoStart + i);
return bindingNoStart;
}
/// Returns the first available binding number in the |set| for which |n|
/// consecutive binding numbers are unused.
uint32_t getNextBindingChunk(uint32_t set, uint32_t n) {
auto &existingBindings = usedBindings[set];
// There were no bindings in this set. Can start at binding zero.
if (existingBindings.empty())
return 0;
// Check whether the chunk of |n| binding numbers can be fitted at the
// very beginning of the list (start at binding 0 in the current set).
uint32_t curBinding = *existingBindings.begin();
if (curBinding >= n)
return 0;
auto iter = std::next(existingBindings.begin());
while (iter != existingBindings.end()) {
// There exists a next binding number that is used. Check to see if the
// gap between current binding number and next binding number is large
// enough to accommodate |n|.
uint32_t nextBinding = *iter;
if (n <= nextBinding - curBinding - 1)
return curBinding + 1;
curBinding = nextBinding;
// Peek at the next binding that has already been used (if any).
++iter;
}
// |curBinding| was the last binding that was used in this set. The next
// chunk of |n| bindings can start at |curBinding|+1.
return curBinding + 1;
}
private:
///< set number -> set of used binding number
llvm::DenseMap<uint32_t, llvm::DenseSet<uint32_t>> usedBindings;
///< set number -> next available binding number
llvm::DenseMap<uint32_t, uint32_t> nextBindings;
llvm::DenseMap<uint32_t, std::set<uint32_t>> usedBindings;
};
} // namespace
@ -1553,11 +1590,30 @@ bool DeclResultIdMapper::decorateResourceBindings() {
// Decorates the given varId of the given category with set number
// setNo, binding number bindingNo. Ignores overlaps.
const auto tryToDecorate = [this, &bindingSet](SpirvVariable *var,
const auto tryToDecorate = [this, &bindingSet](const ResourceVar &var,
const uint32_t setNo,
const uint32_t bindingNo) {
bindingSet.useBinding(bindingNo, setNo);
spvBuilder.decorateDSetBinding(var, setNo, bindingNo);
// By default we use one binding number per resource, and an array of
// resources also gets only one binding number. However, for array of
// resources (e.g. array of textures), DX uses one binding number per array
// element. We can match this behavior via a command line option.
uint32_t numBindingsToUse = 1;
if (spirvOptions.flattenResourceArrays)
numBindingsToUse = var.getArraySize();
for (uint32_t i = 0; i < numBindingsToUse; ++i) {
bool success = bindingSet.useBinding(bindingNo + i, setNo);
if (!success && spirvOptions.flattenResourceArrays) {
emitError("ran into binding number conflict when assigning binding "
"number %0 in set %1",
{})
<< bindingNo << setNo;
}
}
// No need to decorate multiple binding numbers for arrays. It will be done
// by legalization/optimization.
spvBuilder.decorateDSetBinding(var.getSpirvInstr(), setNo, bindingNo);
};
for (const auto &var : resourceVars) {
@ -1570,13 +1626,12 @@ bool DeclResultIdMapper::decorateResourceBindings() {
else if (const auto *reg = var.getRegister())
set = reg->RegisterSpace.getValueOr(defaultSpace);
tryToDecorate(var.getSpirvInstr(), set, vkCBinding->getBinding());
tryToDecorate(var, set, vkCBinding->getBinding());
}
} else {
if (const auto *vkBinding = var.getBinding()) {
// Process m1
tryToDecorate(var.getSpirvInstr(),
getVkBindingAttrSet(vkBinding, defaultSpace),
tryToDecorate(var, getVkBindingAttrSet(vkBinding, defaultSpace),
vkBinding->getBinding());
}
}
@ -1617,10 +1672,18 @@ bool DeclResultIdMapper::decorateResourceBindings() {
llvm_unreachable("unknown register type found");
}
tryToDecorate(var.getSpirvInstr(), set, binding);
tryToDecorate(var, set, binding);
}
for (const auto &var : resourceVars) {
// By default we use one binding number per resource, and an array of
// resources also gets only one binding number. However, for array of
// resources (e.g. array of textures), DX uses one binding number per array
// element. We can match this behavior via a command line option.
uint32_t numBindingsToUse = 1;
if (spirvOptions.flattenResourceArrays)
numBindingsToUse = var.getArraySize();
if (var.isCounter()) {
if (!var.getCounterBinding()) {
// Process mX * c2
@ -1630,15 +1693,17 @@ bool DeclResultIdMapper::decorateResourceBindings() {
else if (const auto *reg = var.getRegister())
set = reg->RegisterSpace.getValueOr(defaultSpace);
spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set,
bindingSet.useNextBinding(set));
spvBuilder.decorateDSetBinding(
var.getSpirvInstr(), set,
bindingSet.useNextBinding(set, numBindingsToUse));
}
} else if (!var.getBinding()) {
const auto *reg = var.getRegister();
if (reg && reg->isSpaceOnly()) {
const uint32_t set = reg->RegisterSpace.getValueOr(defaultSpace);
spvBuilder.decorateDSetBinding(var.getSpirvInstr(), set,
bindingSet.useNextBinding(set));
spvBuilder.decorateDSetBinding(
var.getSpirvInstr(), set,
bindingSet.useNextBinding(set, numBindingsToUse));
} else if (!reg) {
// Process m3 (no 'vk::binding' and no ':register' assignment)
@ -1653,7 +1718,7 @@ bool DeclResultIdMapper::decorateResourceBindings() {
else {
spvBuilder.decorateDSetBinding(
var.getSpirvInstr(), defaultSpace,
bindingSet.useNextBinding(defaultSpace));
bindingSet.useNextBinding(defaultSpace, numBindingsToUse));
}
}
}

Просмотреть файл

@ -111,12 +111,24 @@ private:
class ResourceVar {
public:
ResourceVar(SpirvVariable *var, SourceLocation loc,
ResourceVar(SpirvVariable *var, const Decl *decl, SourceLocation loc,
const hlsl::RegisterAssignment *r, const VKBindingAttr *b,
const VKCounterBindingAttr *cb, bool counter = false,
bool globalsBuffer = false)
: variable(var), srcLoc(loc), reg(r), binding(b), counterBinding(cb),
isCounterVar(counter), isGlobalsCBuffer(globalsBuffer) {}
isCounterVar(counter), isGlobalsCBuffer(globalsBuffer), arraySize(1) {
if (decl) {
if (const ValueDecl *valueDecl = dyn_cast<ValueDecl>(decl)) {
const QualType type = valueDecl->getType();
if (!type.isNull() && type->isConstantArrayType()) {
if (auto constArrayType = dyn_cast<ConstantArrayType>(type)) {
arraySize =
static_cast<uint32_t>(constArrayType->getSize().getZExtValue());
}
}
}
}
}
SpirvVariable *getSpirvInstr() const { return variable; }
SourceLocation getSourceLocation() const { return srcLoc; }
@ -127,6 +139,7 @@ public:
const VKCounterBindingAttr *getCounterBinding() const {
return counterBinding;
}
uint32_t getArraySize() const { return arraySize; }
private:
SpirvVariable *variable; ///< The variable
@ -136,6 +149,7 @@ private:
const VKCounterBindingAttr *counterBinding; ///< Vulkan counter binding
bool isCounterVar; ///< Couter variable or not
bool isGlobalsCBuffer; ///< $Globals cbuffer or not
uint32_t arraySize; ///< Size if resource is an array
};
/// A (instruction-pointer, is-alias-or-not) pair for counter variables
@ -297,6 +311,11 @@ public:
SpirvVariable *createRayTracingNVStageVar(spv::StorageClass sc,
const VarDecl *decl);
/// \brief Creates the taskNV stage variables for payload struct variable
/// and returns true on success. SPIR-V instructions will also be generated
/// to load/store the contents from/to *value. payloadMemOffset is incremented
/// based on payload struct member size, alignment and offset, and SPIR-V
/// decorations PerTaskNV and Offset are assigned to each member.
bool createPayloadStageVars(const hlsl::SigPoint *sigPoint,
spv::StorageClass sc, const NamedDecl *decl,
bool asInput, QualType type,

Просмотреть файл

@ -363,7 +363,7 @@ bool GlPerVertex::tryToAccess(hlsl::SigPoint::Kind sigPointKind,
SpirvInstruction *vecComponent,
SourceLocation loc) {
assert(value);
// invocationId should only be used for HSPCOut.
// invocationId should only be used for HSPCOut or MSOut.
assert(invocationId.hasValue()
? (sigPointKind == hlsl::SigPoint::Kind::HSCPOut ||
sigPointKind == hlsl::SigPoint::Kind::MSOut)
@ -655,7 +655,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind,
// The interesting shader stage is HS. We need the InvocationID to write
// out the value to the correct array element.
SpirvInstruction *offset = nullptr;
QualType type;
QualType type = {};
bool isClip = false;
switch (semanticKind) {
case hlsl::Semantic::Kind::ClipDistance: {
@ -686,7 +686,7 @@ bool GlPerVertex::writeField(hlsl::Semantic::Kind semanticKind,
return false;
}
if (vecComponent) {
QualType elemType;
QualType elemType = {};
if (!isVectorType(type, &elemType)) {
assert(false && "expected vector type");
}

Просмотреть файл

@ -164,7 +164,7 @@ bool spirvToolsLegalize(spv_target_env env, std::vector<uint32_t> *module,
}
bool spirvToolsOptimize(spv_target_env env, std::vector<uint32_t> *module,
const llvm::SmallVector<llvm::StringRef, 4> &flags,
clang::spirv::SpirvCodeGenOptions &spirvOptions,
std::string *messages) {
spvtools::Optimizer optimizer(env);
@ -176,14 +176,16 @@ bool spirvToolsOptimize(spv_target_env env, std::vector<uint32_t> *module,
spvtools::OptimizerOptions options;
options.set_run_validator(false);
if (flags.empty()) {
if (spirvOptions.optConfig.empty()) {
optimizer.RegisterPerformancePasses();
if (spirvOptions.flattenResourceArrays)
optimizer.RegisterPass(spvtools::CreateDescriptorScalarReplacementPass());
optimizer.RegisterPass(spvtools::CreateCompactIdsPass());
} else {
// Command line options use llvm::SmallVector and llvm::StringRef, whereas
// SPIR-V optimizer uses std::vector and std::string.
std::vector<std::string> stdFlags;
for (const auto &f : flags)
for (const auto &f : spirvOptions.optConfig)
stdFlags.push_back(f.str());
if (!optimizer.RegisterPassesFromFlags(stdFlags))
return false;
@ -662,8 +664,7 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
// Run optimization passes
if (theCompilerInstance.getCodeGenOpts().OptimizationLevel > 0) {
std::string messages;
if (!spirvToolsOptimize(targetEnv, &m, spirvOptions.optConfig,
&messages)) {
if (!spirvToolsOptimize(targetEnv, &m, spirvOptions, &messages)) {
emitFatalError("failed to optimize SPIR-V: %0", {}) << messages;
emitNote("please file a bug report on "
"https://github.com/Microsoft/DirectXShaderCompiler/issues "

Просмотреть файл

@ -309,8 +309,10 @@ private:
SpirvInstruction *initValue,
SourceLocation loc);
/// Collects all indices from consecutive MemberExprs
/// TODO: Update method description here.
/// Collects all indices from consecutive MemberExprs, ArraySubscriptExprs and
/// CXXOperatorCallExprs. Also special handles all mesh shader out attributes
/// to return the entire expression in order for caller to extract the member
/// expression.
const Expr *
collectArrayStructIndices(const Expr *expr, bool rawIndex,
llvm::SmallVectorImpl<uint32_t> *rawIndices,

Просмотреть файл

@ -0,0 +1,18 @@
// RUN: %dxc -E main -T ps_6_2 %s | FileCheck %s
// Make sure frexp generate code pattern.
// CHECK:bitcast float {{.*}} to i32
// CHECK:and i32 {{.*}}, 2139095040
// CHECK:add {{.*}}, -1056964608
// CHECK:ashr {{.*}}, 23
// CHECK:sitofp
// CHECK:and i32 {{.*}}, 8388607
// CHECK:or i32 {{.*}}, 1056964608
// CHECK:fadd
float main(float a:A) : SV_Target {
float b;
float c = frexp ( a , b );
return b+c;
}

Просмотреть файл

@ -0,0 +1,17 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Regression test for a validation error, where parameter SROA
// would generate GEPs before the indices it uses
// CHECK: @main
Texture2D tex0[10] : register(t0);
float4 f(Texture2D textures[], unsigned int idx) {
return textures[idx].Load(0);
}
[ RootSignature("DescriptorTable(SRV(t0, numDescriptors=10))") ]
float4 main() : SV_Target {
return f(tex0, 1);
}

Просмотреть файл

@ -0,0 +1,202 @@
// Run: %dxc -T ms_6_5 -E main
// CHECK: OpCapability MeshShadingNV
// CHECK: OpExtension "SPV_NV_mesh_shader"
// CHECK: OpEntryPoint MeshNV %main "main"
// CHECK: OpName %UserVertex "UserVertex"
struct UserVertex {
// CHECK: OpMemberName %UserVertex 0 "position"
// CHECK: OpMemberName %UserVertex 1 "texcoord"
// CHECK: OpMemberName %UserVertex 2 "color"
float3 position;
float2 texcoord;
float3 color;
};
// CHECK: OpName %Mesh "Mesh"
struct Mesh {
// CHECK: OpMemberName %Mesh 0 "firstSubmesh"
// CHECK: OpMemberName %Mesh 1 "submeshCount"
// CHECK: OpMemberName %Mesh 2 "dummy"
uint firstSubmesh;
uint submeshCount;
uint dummy[2];
};
// CHECK: OpName %SubMesh "SubMesh"
struct SubMesh {
// CHECK: OpMemberName %SubMesh 0 "vertexCount"
// CHECK: OpMemberName %SubMesh 1 "vertexOffset"
// CHECK: OpMemberName %SubMesh 2 "primitiveCount"
// CHECK: OpMemberName %SubMesh 3 "indexOffset"
// CHECK: OpMemberName %SubMesh 4 "boundingBox"
uint vertexCount;
uint vertexOffset;
uint primitiveCount;
uint indexOffset;
float4 boundingBox[8];
};
// CHECK: OpDecorate %userVertices DescriptorSet 0
// CHECK: OpDecorate %userVertices Binding 0
// CHECK: OpDecorate %userIndices DescriptorSet 0
// CHECK: OpDecorate %userIndices Binding 1
// CHECK: OpDecorate %meshes DescriptorSet 0
// CHECK: OpDecorate %meshes Binding 2
// CHECK: OpDecorate %submeshes DescriptorSet 0
// CHECK: OpDecorate %submeshes Binding 3
// CHECK: OpDecorate %UBO DescriptorSet 0
// CHECK: OpDecorate %UBO Binding 4
// CHECK: OpMemberDecorate %UserVertex 0 Offset 0
// CHECK: OpMemberDecorate %UserVertex 1 Offset 16
// CHECK: OpMemberDecorate %UserVertex 2 Offset 32
// CHECK: OpDecorate %_runtimearr_UserVertex ArrayStride 48
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_UserVertex 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_UserVertex BufferBlock
// CHECK: OpDecorate %_runtimearr_uint ArrayStride 4
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_uint BufferBlock
// CHECK: OpMemberDecorate %Mesh 0 Offset 0
// CHECK: OpMemberDecorate %Mesh 1 Offset 4
// CHECK: OpMemberDecorate %Mesh 2 Offset 8
// CHECK: OpDecorate %_runtimearr_Mesh ArrayStride 16
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_Mesh 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_Mesh BufferBlock
// CHECK: OpMemberDecorate %SubMesh 0 Offset 0
// CHECK: OpMemberDecorate %SubMesh 1 Offset 4
// CHECK: OpMemberDecorate %SubMesh 2 Offset 8
// CHECK: OpMemberDecorate %SubMesh 3 Offset 12
// CHECK: OpMemberDecorate %SubMesh 4 Offset 16
// CHECK: OpDecorate %_runtimearr_SubMesh ArrayStride 144
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_SubMesh 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_SubMesh BufferBlock
// CHECK: OpMemberDecorate %type_UBO 0 Offset 0
// CHECK: OpMemberDecorate %type_UBO 0 MatrixStride 16
// CHECK: OpMemberDecorate %type_UBO 0 ColMajor
// CHECK: OpDecorate %type_UBO Block
// CHECK: %UserVertex = OpTypeStruct %v3float %v2float %v3float
// CHECK: %_runtimearr_UserVertex = OpTypeRuntimeArray %UserVertex
// CHECK: %type_RWStructuredBuffer_UserVertex = OpTypeStruct %_runtimearr_UserVertex
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_UserVertex = OpTypePointer Uniform %type_RWStructuredBuffer_UserVertex
[[vk::binding(0, 0)]]
RWStructuredBuffer<UserVertex> userVertices;
// CHECK: %_runtimearr_uint = OpTypeRuntimeArray %uint
// CHECK: %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_uint = OpTypePointer Uniform %type_RWStructuredBuffer_uint
[[vk::binding(1, 0)]]
RWStructuredBuffer<uint> userIndices;
// CHECK: %_arr_uint_uint_2 = OpTypeArray %uint %uint_2
// CHECK: %Mesh = OpTypeStruct %uint %uint %_arr_uint_uint_2
// CHECK: %_runtimearr_Mesh = OpTypeRuntimeArray %Mesh
// CHECK: %type_RWStructuredBuffer_Mesh = OpTypeStruct %_runtimearr_Mesh
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_Mesh = OpTypePointer Uniform %type_RWStructuredBuffer_Mesh
[[vk::binding(2, 0)]]
RWStructuredBuffer<Mesh> meshes;
// CHECK: %uint_8 = OpConstant %uint 8
// CHECK: %v4float = OpTypeVector %float 4
// CHECK: %_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
// CHECK: %SubMesh = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8
// CHECK: %_runtimearr_SubMesh = OpTypeRuntimeArray %SubMesh
// CHECK: %type_RWStructuredBuffer_SubMesh = OpTypeStruct %_runtimearr_SubMesh
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_SubMesh = OpTypePointer Uniform %type_RWStructuredBuffer_SubMesh
[[vk::binding(3, 0)]]
RWStructuredBuffer<SubMesh> submeshes;
// CHECK: %mat4v4float = OpTypeMatrix %v4float 4
// CHECK: %type_UBO = OpTypeStruct %mat4v4float
// CHECK: %_ptr_Uniform_type_UBO = OpTypePointer Uniform %type_UBO
[[vk::binding(4, 0)]]
cbuffer UBO {
row_major float4x4 mvp;
}
struct PerVertex {
float4 position : SV_Position;
float2 texcoord : TEXCOORD;
float3 color : COLOR;
};
struct PerPrimitive {
float4 primcolor : PCOLOR;
};
struct SubMeshes {
uint submeshID[256] : SUBMESH;
};
static const uint vertsPerPrim = 3U;
// CHECK: %userVertices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_UserVertex Uniform
// CHECK: %userIndices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_uint Uniform
// CHECK: %meshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_Mesh Uniform
// CHECK: %submeshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_SubMesh Uniform
// CHECK: %UBO = OpVariable %_ptr_Uniform_type_UBO Uniform
[outputtopology("triangle")]
[numthreads(32, 1, 1)]
void main(
out indices uint3 primIndices[128],
out vertices PerVertex verts[128],
out primitives PerPrimitive prims[128],
in payload SubMeshes taskmem,
in uint gid : SV_GroupID,
in uint tid : SV_GroupThreadID
)
{
uint task = taskmem.submeshID[gid];
// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
// CHECK: OpAccessChain %_ptr_Uniform_SubMesh %submeshes %int_0 [[task:%\d+]]
// CHECK: OpStore %submesh [[submeshVal:%\d+]]
SubMesh submesh = submeshes[task];
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_0
uint numPackedVertices = submesh.vertexCount;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_2
uint numPackedPrimitives = submesh.primitiveCount;
SetMeshOutputCounts(numPackedVertices, numPackedPrimitives);
for (uint i = 0U; i < numPackedVertices; i += 32U) {
uint vid = i + tid;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1
uint svid = vid + submesh.vertexOffset;
if (vid >= numPackedVertices) continue;
// CHECK: OpAccessChain %_ptr_Uniform_v2float %userVertices %int_0 [[svid_1:%\d+]] %int_1
verts[vid].texcoord = userVertices[svid].texcoord;
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_2:%\d+]] %int_2
verts[vid].color = userVertices[svid].color;
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_0:%\d+]] %int_0
float3 position = userVertices[svid].position;
// CHECK: OpAccessChain %_ptr_Uniform_mat4v4float %UBO %int_0
verts[vid].position = mul(mvp, float4(position, 1.0));
}
GroupMemoryBarrier();
for (uint j = 0U; j < numPackedPrimitives; j += 32U) {
uint pid = j + tid;
uint didxoff = vertsPerPrim * pid;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_3
uint sidxoff = submesh.indexOffset + didxoff;
if (pid >= numPackedPrimitives) continue;
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_0:%\d+]]
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_1:%\d+]]
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_2:%\d+]]
primIndices[pid] = uint3(userIndices[sidxoff], userIndices[sidxoff+1], userIndices[sidxoff+2]);
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[ind:%\d+]]
uint providx = submesh.vertexOffset + userIndices[sidxoff + vertsPerPrim - 1U];
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[providx:%\d+]] %int_2
prims[pid].primcolor = float4(userVertices[providx].color, 1.0);
}
}

Просмотреть файл

@ -0,0 +1,135 @@
// Run: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1
// CHECK: OpCapability MeshShadingNV
// CHECK: OpExtension "SPV_NV_mesh_shader"
// CHECK: OpEntryPoint TaskNV %main "main"
struct SubMesh {
uint vertexCount;
uint vertexOffset;
uint primitiveCount;
uint indexOffset;
float4 boundingBox[8];
};
struct Mesh {
uint firstSubmesh;
uint submeshCount;
uint dummy[2];
};
struct UserVertex {
float3 position;
float2 texcoord;
float3 color;
};
[[vk::binding(0, 0)]]
RWStructuredBuffer<UserVertex> userVertices;
[[vk::binding(1, 0)]]
RWStructuredBuffer<uint> userIndices;
[[vk::binding(2, 0)]]
RWStructuredBuffer<Mesh> meshes;
[[vk::binding(3, 0)]]
RWStructuredBuffer<SubMesh> submeshes;
[[vk::binding(4, 0)]]
cbuffer UBO {
row_major float4x4 mvp;
}
groupshared uint passedSubmeshes;
struct SubMeshes {
uint submeshID[256] : SUBMESH;
};
groupshared SubMeshes sharedSubMeshes;
// CHECK: %_arr_v4float_uint_8_0 = OpTypeArray %v4float %uint_8
// CHECK: %SubMesh_0 = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8_0
// CHECK: %_ptr_Function_SubMesh_0 = OpTypePointer Function %SubMesh_0
// CHECK: [[funcType:%\d+]] = OpTypeFunction %bool %_ptr_Function_SubMesh_0
bool TestSubmesh(SubMesh submesh) {
uint clip = 0x0U;
for (uint bbv = 0U ; bbv < 8U; bbv++) {
float4 pos= mul(mvp, submesh.boundingBox[bbv]);
if (pos.x <= pos.w) clip |= 0x1U;
if (pos.y <= 0.3333 * pos.w) clip |= 0x2U;
if (pos.z <= pos.w) clip |= 0x4U;
if (pos.x >= -pos.w) clip |= 0x8U;
if (pos.y >= -pos.w) clip |= 0x10U;
if (pos.z >= -pos.w) clip |= 0x20U;
}
return (clip == 0x3FU);
}
[numthreads(32, 1, 1)]
void main(
in uint tid : SV_GroupThreadID,
in uint mid : SV_GroupID
)
{
uint firstSubmesh = meshes[mid].firstSubmesh;
uint submeshCount = meshes[mid].submeshCount;
passedSubmeshes = 0U;
GroupMemoryBarrier();
for (uint i = 0U; i < submeshCount; i += 32U) {
uint smid = firstSubmesh + i + tid;
if (smid >= firstSubmesh + submeshCount) continue;
// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
// CHECK: %passed = OpVariable %_ptr_Function_bool Function
// CHECK: %param_var_submesh = OpVariable %_ptr_Function_SubMesh_0 Function
SubMesh submesh = submeshes[smid];
bool passed = true;
// CHECK: [[submeshValue:%\d+]] = OpLoad %SubMesh_0 %submesh
// CHECK: OpStore %param_var_submesh [[submeshValue]]
// CHECK: [[rv:%\d+]] = OpFunctionCall %bool %TestSubmesh %param_var_submesh
// CHECK: [[cond:%\d+]] = OpLogicalNot %bool [[rv]]
// CHECK: OpSelectionMerge %if_merge_0 None
// CHECK: OpBranchConditional [[cond]] %if_true_0 %if_merge_0
// CHECK: %if_true_0 = OpLabel
// CHECK: OpStore %passed %false
// CHECK: OpBranch %if_merge_0
// CHECK: %if_merge_0 = OpLabel
if (!TestSubmesh(submesh)) passed = false;
if (passed) {
uint ballot = WaveActiveBallot(passed).x;
uint laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
uint lowerThreads = ballot & laneMaskLT;
uint slot = passedSubmeshes + WavePrefixCountBits(passed);
sharedSubMeshes.submeshID[slot] = smid;
if (lowerThreads == 0U) {
passedSubmeshes += WaveActiveCountBits(passed);
}
}
GroupMemoryBarrier();
}
DispatchMesh(passedSubmeshes, 1, 1, sharedSubMeshes);
}
/* bool TestSubmesh(SubMesh submesh) { ... } */
// CHECK: %TestSubmesh = OpFunction %bool None [[funcType]]
// CHECK: %submesh_0 = OpFunctionParameter %_ptr_Function_SubMesh_0
// CHECK: %bb_entry_0 = OpLabel
// CHECK: %clip = OpVariable %_ptr_Function_uint Function
// CHECK: %bbv = OpVariable %_ptr_Function_uint Function
// CHECK: %pos = OpVariable %_ptr_Function_v4float Function
// CHECK: %for_check_0 = OpLabel
// CHECK: %for_body_0 = OpLabel
// CHECK: %for_merge_0 = OpLabel
// CHECK: [[clipValue:%\d+]] = OpLoad %uint %clip
// CHECK: [[retValue:%\d+]] = OpIEqual %bool [[clipValue]] %uint_63
// CHECK: OpReturnValue [[retValue]]
// CHECK: OpFunctionEnd

Просмотреть файл

@ -0,0 +1,19 @@
// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
// CHECK: error: ran into binding number conflict when assigning binding number 3 in set 0
Texture2D MyTextures[5] : register(t0); // Forced use of binding numbers 0, 1, 2, 3, 4.
Texture2D AnotherTexture : register(t3); // Error: Forced use of binding number 3.
SamplerState MySampler;
float4 main(float2 TexCoord : TexCoord) : SV_Target0 {
float4 result =
MyTextures[0].Sample(MySampler, TexCoord) +
MyTextures[1].Sample(MySampler, TexCoord) +
MyTextures[2].Sample(MySampler, TexCoord) +
MyTextures[3].Sample(MySampler, TexCoord) +
MyTextures[4].Sample(MySampler, TexCoord) +
AnotherTexture.Sample(MySampler, TexCoord);
return result;
}

Просмотреть файл

@ -0,0 +1,36 @@
// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3
// CHECK: OpDecorate %AnotherTexture Binding 5
// CHECK: OpDecorate %NextTexture Binding 6
// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 0
// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 1
// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 2
// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 3
// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 4
// CHECK: OpDecorate [[MySamplers0:%\d+]] Binding 7
// CHECK: OpDecorate [[MySamplers1:%\d+]] Binding 8
// CHECK: [[MyTextures0]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures1]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures2]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures3]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures4]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MySamplers0]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
// CHECK: [[MySamplers1]] = OpVariable %_ptr_UniformConstant_type_sampler UniformConstant
Texture2D MyTextures[5] : register(t0);
Texture2D NextTexture; // This is suppose to be t6.
Texture2D AnotherTexture : register(t5);
SamplerState MySamplers[2];
float4 main(float2 TexCoord : TexCoord) : SV_Target0
{
float4 result =
MyTextures[0].Sample(MySamplers[0], TexCoord) +
MyTextures[1].Sample(MySamplers[0], TexCoord) +
MyTextures[2].Sample(MySamplers[0], TexCoord) +
MyTextures[3].Sample(MySamplers[1], TexCoord) +
MyTextures[4].Sample(MySamplers[1], TexCoord) +
AnotherTexture.Sample(MySamplers[1], TexCoord) +
NextTexture.Sample(MySamplers[1], TexCoord);
return result;
}

Просмотреть файл

@ -0,0 +1,23 @@
// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
// CHECK: OpDecorate %MyTextures Binding 0
// CHECK: OpDecorate %AnotherTexture Binding 5
// CHECK: OpDecorate %NextTexture Binding 6
// CHECK: OpDecorate %MySamplers Binding 7
Texture2D MyTextures[5] : register(t0);
Texture2D NextTexture; // This is suppose to be t6.
Texture2D AnotherTexture : register(t5);
SamplerState MySamplers[2];
float4 main(float2 TexCoord : TexCoord) : SV_Target0
{
float4 result =
MyTextures[0].Sample(MySamplers[0], TexCoord) +
MyTextures[1].Sample(MySamplers[0], TexCoord) +
MyTextures[2].Sample(MySamplers[0], TexCoord) +
MyTextures[3].Sample(MySamplers[1], TexCoord) +
MyTextures[4].Sample(MySamplers[1], TexCoord) +
AnotherTexture.Sample(MySamplers[1], TexCoord) +
NextTexture.Sample(MySamplers[1], TexCoord);
return result;
}

Просмотреть файл

@ -0,0 +1,41 @@
// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays -O3
// CHECK: OpDecorate %AnotherTexture Binding 3
// CHECK: OpDecorate %MySampler Binding 2
// CHECK: OpDecorate %MySampler2 Binding 9
// CHECK: OpDecorate [[MyTextures0:%\d+]] Binding 4
// CHECK: OpDecorate [[MyTextures1:%\d+]] Binding 5
// CHECK: OpDecorate [[MyTextures2:%\d+]] Binding 6
// CHECK: OpDecorate [[MyTextures3:%\d+]] Binding 7
// CHECK: OpDecorate [[MyTextures4:%\d+]] Binding 8
// CHECK: OpDecorate [[MyTextures20:%\d+]] Binding 0
// CHECK: OpDecorate [[MyTextures21:%\d+]] Binding 1
// CHECK: [[MyTextures0:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures1:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures2:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures3:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures4:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures20:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
// CHECK: [[MyTextures21:%\d+]] = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant
Texture2D MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8].
Texture2D AnotherTexture : register(t3); // force binding number 3.
Texture2D MyTextures2[2]; // take binding slot 0 and 1.
SamplerState MySampler; // take binding slot 2.
SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9.
float4 main(float2 TexCoord : TexCoord) : SV_Target0
{
float4 result =
MyTextures[0].Sample(MySampler, TexCoord) +
MyTextures[1].Sample(MySampler, TexCoord) +
MyTextures[2].Sample(MySampler, TexCoord) +
MyTextures[3].Sample(MySampler, TexCoord) +
MyTextures[4].Sample(MySampler, TexCoord) +
MyTextures2[0].Sample(MySampler2, TexCoord) +
MyTextures2[1].Sample(MySampler2, TexCoord) +
AnotherTexture.Sample(MySampler, TexCoord);
return result;
}

Просмотреть файл

@ -0,0 +1,29 @@
// Run: %dxc -T ps_6_0 -E main -fspv-flatten-resource-arrays
// CHECK: OpDecorate %AnotherTexture Binding 3
// CHECK: OpDecorate %MyTextures Binding 4
// CHECK: OpDecorate %MyTextures2 Binding 0
// CHECK: OpDecorate %MySampler Binding 2
// CHECK: OpDecorate %MySampler2 Binding 9
Texture2D MyTextures[5]; // five array elements cannot fit in [0-2] binding slots, so it should take slot [4-8].
Texture2D AnotherTexture : register(t3); // force binding number 3.
Texture2D MyTextures2[2]; // take binding slot 0 and 1.
SamplerState MySampler; // take binding slot 2.
SamplerState MySampler2; // binding 0 to 8 are taken. The next available binding is 9.
float4 main(float2 TexCoord : TexCoord) : SV_Target0
{
float4 result =
MyTextures[0].Sample(MySampler, TexCoord) +
MyTextures[1].Sample(MySampler, TexCoord) +
MyTextures[2].Sample(MySampler, TexCoord) +
MyTextures[3].Sample(MySampler, TexCoord) +
MyTextures[4].Sample(MySampler, TexCoord) +
MyTextures2[0].Sample(MySampler2, TexCoord) +
MyTextures2[1].Sample(MySampler2, TexCoord) +
AnotherTexture.Sample(MySampler, TexCoord);
return result;
}

Просмотреть файл

@ -62,6 +62,10 @@ public:
TEST_METHOD(Precise6)
TEST_METHOD(Precise7)
TEST_METHOD(CSGetNumThreads)
TEST_METHOD(MSGetNumThreads)
TEST_METHOD(ASGetNumThreads)
TEST_METHOD(SetValidatorVersion)
void VerifyValidatorVersionFails(
@ -435,6 +439,64 @@ TEST_F(DxilModuleTest, Precise7) {
VERIFY_ARE_EQUAL(numChecks, 4);
}
TEST_F(DxilModuleTest, CSGetNumThreads) {
Compiler c(m_dllSupport);
c.Compile(
"[numthreads(8, 4, 2)]\n"
"void main() {\n"
"}\n"
,
L"cs_6_0"
);
DxilModule &DM = c.GetDxilModule();
VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
}
TEST_F(DxilModuleTest, MSGetNumThreads) {
Compiler c(m_dllSupport);
if (c.SkipDxil_Test(1,5)) return;
c.Compile(
"struct MeshPerVertex { float4 pos : SV_Position; };\n"
"[numthreads(8, 4, 2)]\n"
"[outputtopology(\"triangle\")]\n"
"void main(\n"
" out indices uint3 primIndices[1]\n"
") {\n"
" SetMeshOutputCounts(0, 0);\n"
"}\n"
,
L"ms_6_5"
);
DxilModule &DM = c.GetDxilModule();
VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
}
TEST_F(DxilModuleTest, ASGetNumThreads) {
Compiler c(m_dllSupport);
if (c.SkipDxil_Test(1,5)) return;
c.Compile(
"struct Payload { uint i; };\n"
"[numthreads(8, 4, 2)]\n"
"void main() {\n"
" Payload pld = {0};\n"
" DispatchMesh(1, 1, 1, pld);\n"
"}\n"
,
L"as_6_5"
);
DxilModule &DM = c.GetDxilModule();
VERIFY_ARE_EQUAL(8, DM.GetNumThreads(0));
VERIFY_ARE_EQUAL(4, DM.GetNumThreads(1));
VERIFY_ARE_EQUAL(2, DM.GetNumThreads(2));
}
void DxilModuleTest::VerifyValidatorVersionFails(
LPCWSTR shaderModel, const std::vector<LPCWSTR> &arguments,
const std::vector<LPCSTR> &expectedErrors) {

Просмотреть файл

@ -1620,6 +1620,23 @@ TEST_F(FileTest, VulkanRegisterBinding1to1MappingAssociatedCounter) {
runFileTest("vk.binding.cl.register.counter.hlsl", Expect::Failure);
}
// For flattening array of resources
TEST_F(FileTest, FlattenResourceArrayBindings1) {
runFileTest("vk.binding.cl.flatten-arrays.example1.hlsl");
}
TEST_F(FileTest, FlattenResourceArrayBindings1Optimized) {
runFileTest("vk.binding.cl.flatten-arrays.example1-optimized.hlsl");
}
TEST_F(FileTest, FlattenResourceArrayBindings2) {
runFileTest("vk.binding.cl.flatten-arrays.example2.hlsl");
}
TEST_F(FileTest, FlattenResourceArrayBindings2Optimized) {
runFileTest("vk.binding.cl.flatten-arrays.example2-optimized.hlsl");
}
TEST_F(FileTest, FlattenResourceArrayBindingsOverlapError) {
runFileTest("vk.binding.cl.flatten-arrays.error.hlsl", Expect::Failure);
}
// For testing the "-auto-binding-space" command line option which specifies the
// "default space" for resources.
TEST_F(FileTest, VulkanRegisterBindingDefaultSpaceImplicit) {
@ -2044,6 +2061,9 @@ TEST_F(FileTest, MeshShadingNVMeshLine) {
TEST_F(FileTest, MeshShadingNVMeshPoint) {
runFileTest("meshshading.nv.point.mesh.hlsl");
}
TEST_F(FileTest, MeshShadingNVMeshBuffer) {
runFileTest("meshshading.nv.buffer.mesh.hlsl");
}
TEST_F(FileTest, MeshShadingNVMeshError1) {
runFileTest("meshshading.nv.error1.mesh.hlsl", Expect::Failure);
}
@ -2089,6 +2109,10 @@ TEST_F(FileTest, MeshShadingNVMeshError14) {
TEST_F(FileTest, MeshShadingNVAmplification) {
runFileTest("meshshading.nv.amplification.hlsl");
}
TEST_F(FileTest, MeshShadingNVAmplificationFunCall) {
useVulkan1p1();
runFileTest("meshshading.nv.fncall.amplification.hlsl");
}
TEST_F(FileTest, MeshShadingNVAmplificationError1) {
runFileTest("meshshading.nv.error1.amplification.hlsl", Expect::Failure);
}