Group Signature Elements by Element Width (#610)

This change is to enforce the new constraint on signature packing: pack signature elements by data width. Before we introduce fp16 type, every element was assumed to reserve 32 bits. Since we are introducing a new 16 bit data type, we need a new way to enforce signature rules.

After discussions we decided that it would be nice to pack elements based on data width. However, we are still enforcing the rule that each row contains up to 4 elements, regardless of the size. This way, depending on the hardware support drivers can optimize packing signatures, while on DXIL level we maintain the assumption that there are 4 elements per row. We are also still constraining on the total number of rows to be 32 for now. This can be changed in the future if people find this limit to be an issue.
This commit is contained in:
Young Kim 2017-09-06 11:04:33 -07:00 коммит произвёл GitHub
Родитель 1590f7e953
Коммит 223a885613
20 изменённых файлов: 281 добавлений и 89 удалений

Просмотреть файл

@ -2894,6 +2894,7 @@ META.SEMANTICLEN Semantic length must be at least 1 and at
META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location
META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1
META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type
META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row.
META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value
META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together
META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size

Просмотреть файл

@ -34,6 +34,7 @@ public:
bool operator==(const CompType &o) const;
Kind GetKind() const;
uint8_t GetSizeInBits() const;
static CompType getInvalid();
static CompType getF16();

Просмотреть файл

@ -100,6 +100,13 @@ namespace DXIL {
Invalid = 8
};
// size of each scalar type in signature element in bits
enum class SignatureDataWidth : uint8_t {
Undefined = 0,
Bits16 = 16,
Bits32 = 32,
};
enum class SignatureKind {
Invalid = 0,
Input,

Просмотреть файл

@ -24,7 +24,7 @@ class DxilSignature {
public:
using Kind = DXIL::SignatureKind;
DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind);
DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind, bool useMinPrecision);
DxilSignature(DXIL::SigPointKind sigPointKind);
DxilSignature(const DxilSignature &src);
virtual ~DxilSignature();
@ -49,16 +49,19 @@ public:
// Returns the number of allocated vectors used to contain signature
unsigned NumVectorsUsed(unsigned streamIndex = 0) const;
bool UseMinPrecision() const { return m_UseMinPrecision; }
private:
DXIL::SigPointKind m_sigPointKind;
std::vector<std::unique_ptr<DxilSignatureElement> > m_Elements;
bool m_UseMinPrecision;
};
struct DxilEntrySignature {
DxilEntrySignature(DXIL::ShaderKind shaderKind)
: InputSignature(shaderKind, DxilSignature::Kind::Input),
OutputSignature(shaderKind, DxilSignature::Kind::Output),
PatchConstantSignature(shaderKind, DxilSignature::Kind::PatchConstant) {
DxilEntrySignature(DXIL::ShaderKind shaderKind, bool useMinPrecision)
: InputSignature(shaderKind, DxilSignature::Kind::Input, useMinPrecision),
OutputSignature(shaderKind, DxilSignature::Kind::Output, useMinPrecision),
PatchConstantSignature(shaderKind, DxilSignature::Kind::PatchConstant, useMinPrecision) {
}
DxilEntrySignature(const DxilEntrySignature &src);
DxilSignature InputSignature;

Просмотреть файл

@ -25,6 +25,7 @@ public:
virtual DXIL::SemanticKind GetKind() const = 0;
virtual DXIL::InterpolationMode GetInterpolationMode() const = 0;
virtual DXIL::SemanticInterpretationKind GetInterpretation() const = 0;
virtual DXIL::SignatureDataWidth GetDataBitWidth() const = 0;
virtual uint32_t GetRows() const = 0;
virtual uint32_t GetCols() const = 0;
virtual bool IsAllocated() const = 0;
@ -42,6 +43,7 @@ public:
DXIL::SemanticKind kind;
DXIL::InterpolationMode interpolation;
DXIL::SemanticInterpretationKind interpretation;
DXIL::SignatureDataWidth dataBitWidth;
uint32_t indexFlags;
public:
@ -49,6 +51,7 @@ public:
kind(DXIL::SemanticKind::Arbitrary),
interpolation(DXIL::InterpolationMode::Undefined),
interpretation(DXIL::SemanticInterpretationKind::Arb),
dataBitWidth(DXIL::SignatureDataWidth::Undefined),
indexFlags(0)
{}
__override ~DummyElement() {}
@ -56,6 +59,7 @@ public:
__override DXIL::SemanticKind GetKind() const { return kind; }
__override DXIL::InterpolationMode GetInterpolationMode() const { return interpolation; }
__override DXIL::SemanticInterpretationKind GetInterpretation() const { return interpretation; }
__override DXIL::SignatureDataWidth GetDataBitWidth() const { return dataBitWidth; }
__override uint32_t GetRows() const { return rows; }
__override uint32_t GetCols() const { return cols; }
__override bool IsAllocated() const { return row != (uint32_t)-1; }
@ -98,6 +102,7 @@ public:
kOverlapElement,
kIllegalComponentOrder,
kConflictFit,
kConflictDataWidth,
};
struct PackedRegister {
@ -108,14 +113,15 @@ public:
DXIL::InterpolationMode Interp : 4;
uint8_t IndexFlags : 2;
uint8_t IndexingFixed : 1;
DXIL::SignatureDataWidth DataWidth; // length of each scalar type in bytes. (2 or 4 for now)
PackedRegister();
ConflictType DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width);
ConflictType DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width, DXIL::SignatureDataWidth dataWidth);
ConflictType DetectColConflict(uint8_t flags, unsigned col, unsigned width);
void PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width);
void PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width, DXIL::SignatureDataWidth dataWidth);
};
DxilSignatureAllocator(unsigned numRegisters);
DxilSignatureAllocator(unsigned numRegisters, bool useMinPrecision);
bool GetIgnoreIndexing() const { return m_bIgnoreIndexing; }
void SetIgnoreIndexing(bool ignoreIndexing) { m_bIgnoreIndexing = ignoreIndexing; }
@ -135,9 +141,12 @@ public:
// Pack in a prefix-stable way - appended elements do not affect positions of prior elements.
unsigned PackPrefixStable(std::vector<PackElement*> elements, unsigned startRow, unsigned numRows);
bool UseMinPrecision() const { return m_bUseMinPrecision; }
protected:
std::vector<PackedRegister> m_Registers;
bool m_bIgnoreIndexing;
bool m_bUseMinPrecision;
};

Просмотреть файл

@ -69,12 +69,14 @@ uint8_t DxilSignatureAllocator::GetConflictFlagsRight(uint8_t flags) {
return conflicts;
}
DxilSignatureAllocator::PackedRegister::PackedRegister() : Interp(DXIL::InterpolationMode::Undefined), IndexFlags(0), IndexingFixed(0) {
DxilSignatureAllocator::PackedRegister::PackedRegister()
: Interp(DXIL::InterpolationMode::Undefined), IndexFlags(0),
IndexingFixed(0), DataWidth(DXIL::SignatureDataWidth::Undefined) {
for (unsigned i = 0; i < 4; ++i)
Flags[i] = 0;
}
DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width) {
DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::DetectRowConflict(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned width, DXIL::SignatureDataWidth dataWidth) {
// indexing already present, and element incompatible with indexing
if (IndexFlags && (flags & kEFConflictsWithIndexed))
return kConflictsWithIndexed;
@ -85,6 +87,8 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::Det
return kConflictsWithIndexedTessFactor;
if (Interp != DXIL::InterpolationMode::Undefined && Interp != interp)
return kConflictsWithInterpolationMode;
if (DataWidth != DXIL::SignatureDataWidth::Undefined && DataWidth != dataWidth)
return kConflictDataWidth;
unsigned freeWidth = 0;
for (unsigned i = 0; i < 4; ++i) {
if ((Flags[i] & kEFOccupied) || (Flags[i] & flags))
@ -114,10 +118,13 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::PackedRegister::Det
return kNoConflict;
}
void DxilSignatureAllocator::PackedRegister::PlaceElement(uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp, unsigned col, unsigned width) {
void DxilSignatureAllocator::PackedRegister::PlaceElement(
uint8_t flags, uint8_t indexFlags, DXIL::InterpolationMode interp,
unsigned col, unsigned width, DXIL::SignatureDataWidth dataWidth) {
// Assume no conflicts (DetectRowConflict and DetectColConflict both return 0).
Interp = interp;
IndexFlags |= indexFlags;
DataWidth = dataWidth;
if ((flags & kEFConflictsWithIndexed) || (flags & kEFTessFactor)) {
DXASSERT(indexFlags == IndexFlags, "otherwise, bug in DetectRowConflict checking index flags");
IndexingFixed = 1;
@ -136,8 +143,8 @@ void DxilSignatureAllocator::PackedRegister::PlaceElement(uint8_t flags, uint8_t
}
}
DxilSignatureAllocator::DxilSignatureAllocator(unsigned numRegisters)
: m_bIgnoreIndexing(false) {
DxilSignatureAllocator::DxilSignatureAllocator(unsigned numRegisters, bool useMinPrecision)
: m_bIgnoreIndexing(false), m_bUseMinPrecision(useMinPrecision) {
m_Registers.resize(numRegisters);
}
@ -150,7 +157,7 @@ DxilSignatureAllocator::ConflictType DxilSignatureAllocator::DetectRowConflict(c
uint8_t flags = GetElementFlags(SE);
for (unsigned i = 0; i < rows; ++i) {
uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
ConflictType conflict = m_Registers[row + i].DetectRowConflict(flags, indexFlags, interp, cols);
ConflictType conflict = m_Registers[row + i].DetectRowConflict(flags, indexFlags, interp, cols, SE->GetDataBitWidth());
if (conflict)
return conflict;
}
@ -177,7 +184,7 @@ void DxilSignatureAllocator::PlaceElement(const PackElement *SE, unsigned row, u
uint8_t flags = GetElementFlags(SE);
for (unsigned i = 0; i < rows; ++i) {
uint8_t indexFlags = m_bIgnoreIndexing ? 0 : GetIndexFlags(i, rows);
m_Registers[row + i].PlaceElement(flags, indexFlags, interp, col, cols);
m_Registers[row + i].PlaceElement(flags, indexFlags, interp, col, cols, SE->GetDataBitWidth());
}
}
@ -328,7 +335,7 @@ unsigned DxilSignatureAllocator::PackOptimized(std::vector<PackElement*> element
// ==========
// Preallocate clip/cull elements
std::sort(clipcullElements.begin(), clipcullElements.end(), CmpElementsLess);
DxilSignatureAllocator clipcullAllocator(2);
DxilSignatureAllocator clipcullAllocator(2, m_bUseMinPrecision);
unsigned clipcullRegUsed = clipcullAllocator.PackGreedy(clipcullElements, 0, 2);
unsigned clipcullComponentsByRow[2] = {0, 0};
for (auto &SE : clipcullElements) {
@ -349,6 +356,7 @@ unsigned DxilSignatureAllocator::PackOptimized(std::vector<PackElement*> element
clipcullTempElements[row].kind = clipcullElementsByRow[row][0]->GetKind();
clipcullTempElements[row].interpolation = clipcullElementsByRow[row][0]->GetInterpolationMode();
clipcullTempElements[row].interpretation = clipcullElementsByRow[row][0]->GetInterpretation();
clipcullTempElements[row].dataBitWidth = clipcullElementsByRow[row][0]->GetDataBitWidth();
clipcullTempElements[row].rows = 1;
clipcullTempElements[row].cols = clipcullComponentsByRow[row];
}
@ -435,7 +443,7 @@ unsigned DxilSignatureAllocator::PackPrefixStable(std::vector<PackElement*> elem
// Special handling for prefix-stable clip/cull arguments
// - basically, do not pack with anything else to maximize chance to pack into two register limit
unsigned clipcullRegUsed = 0;
DxilSignatureAllocator clipcullAllocator(2);
DxilSignatureAllocator clipcullAllocator(2, m_bUseMinPrecision);
DummyElement clipcullTempElements[2];
for (auto &SE : elements) {
@ -458,6 +466,7 @@ unsigned DxilSignatureAllocator::PackPrefixStable(std::vector<PackElement*> elem
clipcullTempElements[used - 1].kind = SE->GetKind();
clipcullTempElements[used - 1].interpolation = SE->GetInterpolationMode();
clipcullTempElements[used - 1].interpretation = SE->GetInterpretation();
clipcullTempElements[used - 1].dataBitWidth = SE->GetDataBitWidth();
clipcullTempElements[used - 1].rows = 1;
clipcullTempElements[used - 1].cols = 4;
rowsUsed = std::max(rowsUsed, PackNext(&clipcullTempElements[used - 1], startRow, numRows));

Просмотреть файл

@ -110,13 +110,26 @@ protected:
class DxilPackElement : public DxilSignatureAllocator::PackElement {
DxilSignatureElement *m_pSE;
bool m_bUseMinPrecision;
public:
DxilPackElement(DxilSignatureElement *pSE) : m_pSE(pSE) {}
DxilPackElement(DxilSignatureElement *pSE, bool useMinPrecision) : m_pSE(pSE), m_bUseMinPrecision(useMinPrecision) {}
__override ~DxilPackElement() {}
__override uint32_t GetID() const { return m_pSE->GetID(); }
__override DXIL::SemanticKind GetKind() const { return m_pSE->GetKind(); }
__override DXIL::InterpolationMode GetInterpolationMode() const { return m_pSE->GetInterpolationMode()->GetKind(); }
__override DXIL::SemanticInterpretationKind GetInterpretation() const { return m_pSE->GetInterpretation(); }
__override DXIL::SignatureDataWidth GetDataBitWidth() const {
uint8_t size = m_pSE->GetCompType().GetSizeInBits();
// bool, min precision, or 32 bit types map to 32 bit size.
if (size == 16) {
return m_bUseMinPrecision ? DXIL::SignatureDataWidth::Bits32 : DXIL::SignatureDataWidth::Bits16;
}
else if (size == 1 || size == 32) {
return DXIL::SignatureDataWidth::Bits32;
}
return DXIL::SignatureDataWidth::Undefined;
}
__override uint32_t GetRows() const { return m_pSE->GetRows(); }
__override uint32_t GetCols() const { return m_pSE->GetCols(); }
__override bool IsAllocated() const { return m_pSE->IsAllocated(); }

Просмотреть файл

@ -148,6 +148,7 @@ enum class ValidationRule : unsigned {
MetaSemanticShouldBeAllocated, // Semantic should have a valid packing location
MetaSemanticShouldNotBeAllocated, // Semantic should have a packing location of -1
MetaSignatureCompType, // signature %0 specifies unrecognized or invalid component type
MetaSignatureDataWidth, // Data width must be identical for all elements packed into the same row.
MetaSignatureIllegalComponentOrder, // Component ordering for packed elements must be: arbitrary < system value < system generated value
MetaSignatureIndexConflict, // Only elements with compatible indexing rules may be packed together
MetaSignatureOutOfRange, // Signature elements must fit within maximum signature size

Просмотреть файл

@ -125,7 +125,10 @@ static bool CheckFit(ElementVec &elements) {
packElements.reserve(elements.size());
for (auto &E : elements)
packElements.push_back(&E);
DxilSignatureAllocator alloc(32);
// Since we are putting an upper limit of 4x32 registers regardless of actual element size,
// we can just have allocator to use the default behavior.
// This should be fixed if we enforce loose upper limit on total number of signature registers based on element size.
DxilSignatureAllocator alloc(32, true);
alloc.SetIgnoreIndexing(true);
alloc.PackOptimized(packElements, 0, 32);
for (auto &E : elements) {

Просмотреть файл

@ -42,6 +42,36 @@ CompType::Kind CompType::GetKind() const {
return m_Kind;
}
uint8_t CompType::GetSizeInBits() const {
switch (m_Kind) {
case Kind::Invalid:
return 0;
case Kind::I1:
return 1;
case Kind::SNormF16:
case Kind::UNormF16:
case Kind::I16:
case Kind::F16:
case Kind::U16:
return 16;
case Kind::SNormF32:
case Kind::UNormF32:
case Kind::I32:
case Kind::U32:
case Kind::F32:
return 32;
case Kind::I64:
case Kind::U64:
case Kind::SNormF64:
case Kind::UNormF64:
case Kind::F64:
return 64;
default:
DXASSERT(false, "invalid type kind");
}
return 0;
}
CompType CompType::getInvalid() {
return CompType();
}

Просмотреть файл

@ -139,6 +139,7 @@ private:
const DxilSignature &m_signature;
DXIL::TessellatorDomain m_domain;
bool m_isInput;
bool m_useMinPrecision;
size_t m_fixedSize;
typedef std::pair<const char *, uint32_t> NameOffsetPair;
typedef llvm::SmallMapVector<const char *, uint32_t, 8> NameOffsetMap;
@ -192,7 +193,9 @@ private:
else
sig.AlwaysReads_Mask = 0;
sig.MinPrecision = CompTypeToSigMinPrecision(pElement->GetCompType());
sig.MinPrecision = m_useMinPrecision
? CompTypeToSigMinPrecision(pElement->GetCompType())
: DxilProgramSigMinPrecision::Default;
for (unsigned i = 0; i < eltCount; ++i) {
sig.SemanticIndex = indexVec[i];
@ -228,8 +231,8 @@ private:
public:
DxilProgramSignatureWriter(const DxilSignature &signature,
DXIL::TessellatorDomain domain, bool isInput)
: m_signature(signature), m_domain(domain), m_isInput(isInput) {
DXIL::TessellatorDomain domain, bool isInput, bool UseMinPrecision)
: m_signature(signature), m_domain(domain), m_isInput(isInput), m_useMinPrecision(UseMinPrecision) {
calcSizes();
}
@ -281,14 +284,18 @@ public:
DxilPartWriter *hlsl::NewProgramSignatureWriter(const DxilModule &M, DXIL::SignatureKind Kind) {
switch (Kind) {
case DXIL::SignatureKind::Input:
return new DxilProgramSignatureWriter(M.GetInputSignature(),
M.GetTessellatorDomain(), true);
return new DxilProgramSignatureWriter(
M.GetInputSignature(), M.GetTessellatorDomain(), true,
!M.m_ShaderFlags.GetUseNativeLowPrecision());
case DXIL::SignatureKind::Output:
return new DxilProgramSignatureWriter(M.GetOutputSignature(),
M.GetTessellatorDomain(), false);
return new DxilProgramSignatureWriter(
M.GetOutputSignature(), M.GetTessellatorDomain(), false,
!M.m_ShaderFlags.GetUseNativeLowPrecision());
case DXIL::SignatureKind::PatchConstant:
return new DxilProgramSignatureWriter(M.GetPatchConstantSignature(),
M.GetTessellatorDomain(), /*IsInput*/ M.GetShaderModel()->IsDS());
return new DxilProgramSignatureWriter(
M.GetPatchConstantSignature(), M.GetTessellatorDomain(),
/*IsInput*/ M.GetShaderModel()->IsDS(),
/*UseMinPrecision*/!M.m_ShaderFlags.GetUseNativeLowPrecision());
}
return nullptr;
}
@ -806,12 +813,14 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
if (ValMajor == 1 && ValMinor == 0)
Flags &= ~SerializeDxilFlags::IncludeDebugNamePart;
DxilProgramSignatureWriter inputSigWriter(pModule->GetInputSignature(),
pModule->GetTessellatorDomain(),
/*IsInput*/ true);
DxilProgramSignatureWriter outputSigWriter(pModule->GetOutputSignature(),
pModule->GetTessellatorDomain(),
/*IsInput*/ false);
DxilProgramSignatureWriter inputSigWriter(
pModule->GetInputSignature(), pModule->GetTessellatorDomain(),
/*IsInput*/ true,
/*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
DxilProgramSignatureWriter outputSigWriter(
pModule->GetOutputSignature(), pModule->GetTessellatorDomain(),
/*IsInput*/ false,
/*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
DxilPSVWriter PSVWriter(*pModule);
DxilContainerWriter_impl writer;
@ -831,8 +840,8 @@ void hlsl::SerializeDxilContainerForModule(DxilModule *pModule,
DxilProgramSignatureWriter patchConstantSigWriter(
pModule->GetPatchConstantSignature(), pModule->GetTessellatorDomain(),
/*IsInput*/ pModule->GetShaderModel()->IsDS());
/*IsInput*/ pModule->GetShaderModel()->IsDS(),
/*UseMinPrecision*/ !pModule->m_ShaderFlags.GetUseNativeLowPrecision());
if (pModule->GetPatchConstantSignature().GetElements().size()) {
writer.AddPart(DFCC_PatchConstantSignature, patchConstantSigWriter.size(),
[&](AbstractMemoryStream *pStream) {

Просмотреть файл

@ -223,7 +223,7 @@ public:
m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
std::unique_ptr<DxilEntrySignature> pSig =
llvm::make_unique<DxilEntrySignature>(SM->GetKind());
llvm::make_unique<DxilEntrySignature>(SM->GetKind(), M.GetHLModule().GetHLOptions().bUseMinPrecision);
// EntrySig for shader functions.
std::unordered_map<llvm::Function *, std::unique_ptr<DxilEntrySignature>>
DxilEntrySignatureMap;
@ -239,7 +239,7 @@ public:
if (m_pHLModule->HasDxilFunctionProps(&F)) {
DxilFunctionProps &props = m_pHLModule->GetDxilFunctionProps(&F);
std::unique_ptr<DxilEntrySignature> pSig =
llvm::make_unique<DxilEntrySignature>(props.shaderKind);
llvm::make_unique<DxilEntrySignature>(props.shaderKind, m_pHLModule->GetHLOptions().bUseMinPrecision);
HLSignatureLower sigLower(&F, *m_pHLModule, *pSig);
sigLower.Run();
DxilEntrySignatureMap[&F] = std::move(pSig);

Просмотреть файл

@ -141,7 +141,7 @@ void DxilModule::SetShaderModel(const ShaderModel *pSM) {
m_pSM->GetDxilVersion(m_DxilMajor, m_DxilMinor);
m_pMDHelper->SetShaderModel(m_pSM);
DXIL::ShaderKind shaderKind = pSM->GetKind();
m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind);
m_EntrySignature = llvm::make_unique<DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
m_RootSignature.reset(new RootSignatureHandle());
}
@ -1298,7 +1298,7 @@ void DxilModule::LoadDxilMetadata() {
DXIL::ShaderKind shaderKind = m_DxilFunctionPropsMap[F]->shaderKind;
std::unique_ptr<hlsl::DxilEntrySignature> Sig =
llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind);
llvm::make_unique<hlsl::DxilEntrySignature>(shaderKind, !m_ShaderFlags.GetUseNativeLowPrecision());
m_pMDHelper->LoadDxilSignatures(pSig->getOperand(idx), *Sig);

Просмотреть файл

@ -23,8 +23,12 @@ namespace hlsl {
//
// Singnature methods.
//
DxilSignature::DxilSignature(DXIL::ShaderKind shaderKind, DXIL::SignatureKind sigKind)
: m_sigPointKind(SigPoint::GetKind(shaderKind, sigKind, /*isPatchConstantFunction*/false, /*isSpecialInput*/false)) {}
DxilSignature::DxilSignature(DXIL::ShaderKind shaderKind,
DXIL::SignatureKind sigKind, bool useMinPrecision)
: m_sigPointKind(SigPoint::GetKind(shaderKind, sigKind,
/*isPatchConstantFunction*/ false,
/*isSpecialInput*/ false)),
m_UseMinPrecision(useMinPrecision) {}
DxilSignature::DxilSignature(DXIL::SigPointKind sigPointKind)
: m_sigPointKind(sigPointKind) {}
@ -124,12 +128,15 @@ unsigned DxilSignature::PackElements(DXIL::PackingStrategy packing) {
std::vector<DxilPackElement> packElements;
for (auto &SE : m_Elements) {
if (ShouldBeAllocated(SE.get()))
packElements.emplace_back(SE.get());
packElements.emplace_back(SE.get(), m_UseMinPrecision);
}
if (m_sigPointKind == DXIL::SigPointKind::GSOut) {
// Special case due to support for multiple streams
DxilSignatureAllocator alloc[4] = {32, 32, 32, 32};
DxilSignatureAllocator alloc[4] = {{32, UseMinPrecision()},
{32, UseMinPrecision()},
{32, UseMinPrecision()},
{32, UseMinPrecision()}};
std::vector<DxilSignatureAllocator::PackElement*> elements[4];
for (auto &SE : packElements) {
elements[SE.Get()->GetOutputStream()].push_back(&SE);
@ -173,7 +180,7 @@ unsigned DxilSignature::PackElements(DXIL::PackingStrategy packing) {
case DXIL::PackingKind::Vertex:
case DXIL::PackingKind::PatchConstant: {
DxilSignatureAllocator alloc(32);
DxilSignatureAllocator alloc(32, UseMinPrecision());
std::vector<DxilSignatureAllocator::PackElement*> elements;
elements.reserve(packElements.size());
for (auto &SE : packElements){

Просмотреть файл

@ -85,6 +85,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
case hlsl::ValidationRule::MetaSignatureOutOfRange: return "signature element %0 at location (%1,%2) size (%3,%4) is out of range.";
case hlsl::ValidationRule::MetaSignatureIndexConflict: return "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.";
case hlsl::ValidationRule::MetaSignatureIllegalComponentOrder: return "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).";
case hlsl::ValidationRule::MetaSignatureDataWidth: return "signature element %0 at location (%1, %2) size (%3, %4) has data width that differs from another element packed into the same row.";
case hlsl::ValidationRule::MetaIntegerInterpMode: return "signature element %0 specifies invalid interpolation mode for integer component type.";
case hlsl::ValidationRule::MetaInterpModeInOneRow: return "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.";
case hlsl::ValidationRule::MetaSemanticCompType: return "%0 must be %1";
@ -3437,7 +3438,7 @@ static void ValidateSignatureOverlap(
break;
}
DxilPackElement PE(&E);
DxilPackElement PE(&E, allocator.UseMinPrecision());
DxilSignatureAllocator::ConflictType conflict = allocator.DetectRowConflict(&PE, E.GetStartRow());
if (conflict == DxilSignatureAllocator::kNoConflict || conflict == DxilSignatureAllocator::kInsufficientFreeComponents)
conflict = allocator.DetectColConflict(&PE, E.GetStartRow(), E.GetStartCol());
@ -3496,6 +3497,14 @@ static void ValidateSignatureOverlap(
std::to_string(E.GetRows()),
std::to_string(E.GetCols())});
break;
case DxilSignatureAllocator::kConflictDataWidth:
ValCtx.EmitFormatError(ValidationRule::MetaSignatureDataWidth,
{E.GetName(),
std::to_string(E.GetStartRow()),
std::to_string(E.GetStartCol()),
std::to_string(E.GetRows()),
std::to_string(E.GetCols())});
break;
default:
DXASSERT(false, "otherwise, unrecognized conflict type from DxilSignatureAllocator");
}
@ -3503,7 +3512,11 @@ static void ValidateSignatureOverlap(
static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
unsigned maxScalars) {
DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {32, 32, 32, 32};
DxilSignatureAllocator allocator[DXIL::kNumOutputStreams] = {
{32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
{32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
{32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()},
{32, !ValCtx.DxilMod.m_ShaderFlags.GetUseNativeLowPrecision()}};
unordered_set<Semantic::Kind> semanticUsageSet[DXIL::kNumOutputStreams];
StringMap<unordered_set<unsigned>> semanticIndexMap[DXIL::kNumOutputStreams];
unordered_set<unsigned> clipcullRowSet[DXIL::kNumOutputStreams];
@ -3516,6 +3529,7 @@ static void ValidateSignature(ValidationContext &ValCtx, const DxilSignature &S,
const InterpolationMode *prevBaryInterpMode = nullptr;
unsigned numBarycentrics = 0;
for (auto &E : S.GetElements()) {
DXIL::SemanticKind semanticKind = E->GetSemantic()->GetKind();
ValidateSignatureElement(*E, ValCtx);

Просмотреть файл

@ -0,0 +1,15 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// CHECK: {{![0-9]+}} = !{i32 0, !"A", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 1, !"B", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 2, null}
// CHECK: {{![0-9]+}} = !{i32 2, !"C", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 3, i32 1, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 3, !"D", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 2, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 4, !"E", i8 4, i8 0, {{![0-9]+}}, i8 1, i32 1, i8 1, i32 3, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 5, !"F", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 2, i8 2, null}
// CHECK: {{![0-9]+}} = !{i32 6, !"G", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 1, i32 1, i8 3, null}
float4 main(min16float2 a : A, float2 b : B, half3 c : C,
float2 d : D, int e : E, half2 f : F, half g : G) : SV_Target {
return 1;
}

Просмотреть файл

@ -0,0 +1,16 @@
// RUN: %dxc -E main -T ps_6_0 -no-min-precision %s | FileCheck %s
// TODO: Update this file when we introduce i8/i16.
// CHECK: {{![0-9]+}} = !{i32 0, !"A", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 1, !"B", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 1, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 2, !"C", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 3, i32 2, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 3, !"D", i8 9, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 1, i8 2, null}
// CHECK: {{![0-9]+}} = !{i32 4, !"E", i8 4, i8 0, {{![0-9]+}}, i8 1, i32 1, i8 1, i32 3, i8 0, null}
// CHECK: {{![0-9]+}} = !{i32 5, !"F", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 2, i32 0, i8 2, null}
// CHECK: {{![0-9]+}} = !{i32 6, !"G", i8 8, i8 0, {{![0-9]+}}, i8 2, i32 1, i8 1, i32 2, i8 3, null}
float4 main(min16float2 a : A, float2 b : B, half3 c : C,
float2 d : D, int e : E, half2 f : F, half g : G) : SV_Target {
return 1;
}

Просмотреть файл

@ -692,6 +692,8 @@ public:
TEST_METHOD(CodeGenSelectObj5)
TEST_METHOD(CodeGenSelfCopy)
TEST_METHOD(CodeGenSelMat)
TEST_METHOD(CodeGenSignaturePacking)
TEST_METHOD(CodeGenSignaturePackingByWidth)
TEST_METHOD(CodeGenShaderAttr)
TEST_METHOD(CodeGenShare_Mem_Dbg)
TEST_METHOD(CodeGenShare_Mem_Phi)
@ -3922,6 +3924,14 @@ TEST_F(CompilerTest, CodeGenSelMat) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\selMat.hlsl");
}
TEST_F(CompilerTest, CodeGenSignaturePacking) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\signature_packing.hlsl");
}
TEST_F(CompilerTest, CodeGenSignaturePackingByWidth) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\signature_packing_by_width.hlsl");
}
TEST_F(CompilerTest, CodeGenShaderAttr) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\shader_attr.hlsl");
}

Просмотреть файл

@ -146,6 +146,7 @@ public:
TEST_METHOD(OutputControlPointIDInPatchConstantFunction);
TEST_METHOD(GsVertexIDOutOfBound)
TEST_METHOD(StreamIDOutOfBound)
TEST_METHOD(SignatureDataWidth)
TEST_METHOD(SignatureStreamIDForNonGS)
TEST_METHOD(TypedUAVStoreFullMask0)
TEST_METHOD(TypedUAVStoreFullMask1)
@ -359,42 +360,48 @@ public:
}
void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
IDxcBlob **pResultBlob) {
LPCWSTR *pArguments, UINT32 argCount, const DxcDefine *pDefines,
UINT32 defineCount, IDxcBlob **pResultBlob) {
CComPtr<IDxcCompiler> pCompiler;
CComPtr<IDxcOperationResult> pResult;
CComPtr<IDxcBlob> pProgram;
CA2W shWide(pShaderModel, CP_UTF8);
VERIFY_SUCCEEDED(
m_dllSupport.CreateInstance(CLSID_DxcCompiler, &pCompiler));
VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main",
shWide, nullptr, 0, nullptr, 0, nullptr,
&pResult));
VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", shWide,
pArguments, argCount, pDefines,
defineCount, nullptr, &pResult));
CheckOperationResultMsgs(pResult, nullptr, false, false);
VERIFY_SUCCEEDED(pResult->GetResult(pResultBlob));
}
void CompileSource(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
IDxcBlob **pResultBlob) {
CompileSource(pSource, pShaderModel, nullptr, 0, nullptr, 0, pResultBlob);
}
void CompileSource(LPCSTR pSource, LPCSTR pShaderModel,
IDxcBlob **pResultBlob) {
CComPtr<IDxcBlobEncoding> pSourceBlob;
Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
CompileSource(pSourceBlob, pShaderModel, pResultBlob);
CompileSource(pSourceBlob, pShaderModel, nullptr, 0, nullptr, 0, pResultBlob);
}
void DisassembleProgram(IDxcBlob *pProgram, std::string *text) {
*text = ::DisassembleProgram(m_dllSupport, pProgram);
}
void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
void RewriteAssemblyCheckMsg(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
LPCWSTR *pArguments, UINT32 argCount,
const DxcDefine *pDefines, UINT32 defineCount,
llvm::ArrayRef<LPCSTR> pLookFors,
llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs,
bool bRegex = false) {
CComPtr<IDxcBlob> pText;
CComPtr<IDxcBlobEncoding> pSourceBlob;
Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
RewriteAssemblyToText(pSourceBlob, pShaderModel, pLookFors, pReplacements, &pText, bRegex);
RewriteAssemblyToText(pSource, pShaderModel, pArguments, argCount, pDefines, defineCount, pLookFors, pReplacements, &pText, bRegex);
CComPtr<IDxcAssembler> pAssembler;
CComPtr<IDxcOperationResult> pAssembleResult;
VERIFY_SUCCEEDED(
@ -409,12 +416,62 @@ public:
}
}
void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
LPCWSTR *pArguments, UINT32 argCount,
const DxcDefine *pDefines, UINT32 defineCount,
llvm::ArrayRef<LPCSTR> pLookFors,
llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs,
bool bRegex = false) {
CComPtr<IDxcBlobEncoding> pSourceBlob;
Utf8ToBlob(m_dllSupport, pSource, &pSourceBlob);
RewriteAssemblyCheckMsg(pSourceBlob, pShaderModel, pArguments, argCount,
pDefines, defineCount, pLookFors, pReplacements,
pErrorMsgs, bRegex);
}
void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
RewriteAssemblyCheckMsg(pSource, pShaderModel, nullptr, 0, nullptr, 0, pLookFors, pReplacements, pErrorMsgs, bRegex);
}
void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
LPCWSTR *pArguments, UINT32 argCount,
const DxcDefine *pDefines, UINT32 defCount,
llvm::ArrayRef<LPCSTR> pLookFors,
llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs,
bool bRegex = false) {
std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
CComPtr<IDxcLibrary> pLibrary;
CComPtr<IDxcBlobEncoding> pSource;
VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
VERIFY_SUCCEEDED(
pLibrary->CreateBlobFromFile(fullPath.c_str(), nullptr, &pSource));
RewriteAssemblyCheckMsg(pSource, pShaderModel,
pArguments, argCount, pDefines, defCount, pLookFors,
pReplacements, pErrorMsgs, bRegex);
}
void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
llvm::ArrayRef<LPCSTR> pLookFors,
llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs,
bool bRegex = false) {
RewriteAssemblyCheckMsg(name, pShaderModel, nullptr, 0, nullptr, 0,
pLookFors, pReplacements, pErrorMsgs, bRegex);
}
void RewriteAssemblyToText(IDxcBlobEncoding *pSource, LPCSTR pShaderModel,
llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
LPCWSTR *pArguments, UINT32 argCount,
const DxcDefine *pDefines, UINT32 defineCount,
llvm::ArrayRef<LPCSTR> pLookFors,
llvm::ArrayRef<LPCSTR> pReplacements,
IDxcBlob **pBlob, bool bRegex = false) {
CComPtr<IDxcBlob> pProgram;
std::string disassembly;
CompileSource(pSource, pShaderModel, &pProgram);
CompileSource(pSource, pShaderModel, pArguments, argCount, pDefines, defineCount, &pProgram);
DisassembleProgram(pProgram, &disassembly);
for (unsigned i = 0; i < pLookFors.size(); ++i) {
LPCSTR pLookFor = pLookFors[i];
@ -456,33 +513,7 @@ public:
}
Utf8ToBlob(m_dllSupport, disassembly.c_str(), pBlob);
}
void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
CComPtr<IDxcLibrary> pLibrary;
CComPtr<IDxcBlobEncoding> pSource;
VERIFY_SUCCEEDED(m_dllSupport.CreateInstance(CLSID_DxcLibrary, &pLibrary));
VERIFY_SUCCEEDED(
pLibrary->CreateBlobFromFile(fullPath.c_str(), nullptr, &pSource));
CComPtr<IDxcBlob> pText;
RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText, bRegex);
CComPtr<IDxcAssembler> pAssembler;
CComPtr<IDxcOperationResult> pAssembleResult;
VERIFY_SUCCEEDED(
m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
if (!CheckOperationResultMsgs(pAssembleResult, pErrorMsgs, true, bRegex)) {
// Assembly succeeded, try validation.
CComPtr<IDxcBlob> pBlob;
VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
CheckValidationMsgs(pBlob, pErrorMsgs, bRegex);
}
}
// compile one or two sources, validate module from 1 with container parts from 2, check messages
void ReplaceContainerPartsCheckMsgs(LPCSTR pSource1, LPCSTR pSource2, LPCSTR pShaderModel,
@ -1069,6 +1100,18 @@ TEST_F(ValidationTest, StreamIDOutOfBound) {
"expect StreamID between 0 , got 1");
}
TEST_F(ValidationTest, SignatureDataWidth) {
std::vector<LPCWSTR> pArguments = { L"-no-min-precision" };
RewriteAssemblyCheckMsg(
L"..\\CodeGenHLSL\\signature_packing_by_width.hlsl", "ps_6_0",
pArguments.data(), 1, nullptr, 0,
{"i8 8, i8 0, (![0-9]+), i8 2, i32 1, i8 2, i32 0, i8 0, null}"},
{"i8 9, i8 0, \\1, i8 2, i32 1, i8 2, i32 0, i8 0, null}"},
"signature element F at location \\(0, 2\\) size \\(1, 2\\) has data "
"width that differs from another element packed into the same row.",
true);
}
TEST_F(ValidationTest, SignatureStreamIDForNonGS) {
RewriteAssemblyCheckMsg(
L"..\\CodeGenHLSL\\abs1.hlsl", "ps_6_0",

Просмотреть файл

@ -1563,6 +1563,7 @@ class db_dxil(object):
self.add_valrule_msg("Meta.SignatureOutOfRange", "Signature elements must fit within maximum signature size", "signature element %0 at location (%1,%2) size (%3,%4) is out of range.")
self.add_valrule_msg("Meta.SignatureIndexConflict", "Only elements with compatible indexing rules may be packed together", "signature element %0 at location (%1,%2) size (%3,%4) has an indexing conflict with another signature element packed into the same row.")
self.add_valrule_msg("Meta.SignatureIllegalComponentOrder", "Component ordering for packed elements must be: arbitrary < system value < system generated value", "signature element %0 at location (%1,%2) size (%3,%4) violates component ordering rule (arb < sv < sgv).")
self.add_valrule_msg("Meta.SignatureDataWidth", "Data width must be identical for all elements packed into the same row.", "signature element %0 at location (%1, %2) size (%3, %4) has data width that differs from another element packed into the same row.")
self.add_valrule_msg("Meta.IntegerInterpMode", "Interpolation mode on integer must be Constant", "signature element %0 specifies invalid interpolation mode for integer component type.")
self.add_valrule_msg("Meta.InterpModeInOneRow", "Interpolation mode must be identical for all elements packed into the same row.", "signature element %0 at location (%1,%2) size (%3,%4) has interpolation mode that differs from another element packed into the same row.")
self.add_valrule("Meta.SemanticCompType", "%0 must be %1")