Fix EvalauteAttribute Intrinsics (#275)

Currently the compiler crashes when we pass in matrix or vectors with reduced dimension from the original signature element for EvalAttribute functions. This change resolves this issue by replacing allocas before we translate these function calls from DXIR to DXIL and find LoadInputs to replace correctly.
This commit is contained in:
Young Kim 2017-05-05 15:10:46 -07:00 коммит произвёл GitHub
Родитель da5f98bb95
Коммит 47d21ad6c0
6 изменённых файлов: 189 добавлений и 23 удалений

Просмотреть файл

@ -256,7 +256,7 @@ public:
// Load up debug information, to cross-reference values and the instructions // Load up debug information, to cross-reference values and the instructions
// used to load them. // used to load them.
m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0; m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
LegalizeEvalOperations(M);
if (!SM->IsCS()) { if (!SM->IsCS()) {
CreateDxilSignatures(); CreateDxilSignatures();
@ -381,6 +381,10 @@ private:
// For validation // For validation
std::unordered_map<unsigned, std::unordered_set<unsigned> > m_InputSemanticsUsed, std::unordered_map<unsigned, std::unordered_set<unsigned> > m_InputSemanticsUsed,
m_OutputSemanticsUsed[4], m_PatchConstantSemanticsUsed, m_OtherSemanticsUsed; m_OutputSemanticsUsed[4], m_PatchConstantSemanticsUsed, m_OtherSemanticsUsed;
// For EvaluateAttribute operations.
void LegalizeEvalOperations(Module &M);
void FindAllocasForEvalOperations(Value *f, std::unordered_set<AllocaInst*> &allocas);
}; };
class SimplifyInst : public FunctionPass { class SimplifyInst : public FunctionPass {
@ -2704,6 +2708,81 @@ void DxilGenerationPass::UpdateStructTypeForLegacyLayout() {
UpdateStructTypeForLegacyLayoutOnHLM(*m_pHLModule); UpdateStructTypeForLegacyLayoutOnHLM(*m_pHLModule);
} }
// Find allocas for EvaluateAttribute operations
void DxilGenerationPass::FindAllocasForEvalOperations(
Value *val, std::unordered_set<AllocaInst *> &allocas) {
Value *CurVal = val;
while (!isa<AllocaInst>(CurVal)) {
if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
} else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
Value *arg0 =
IE->getOperand(0); // Could be another insertelement or undef
Value *arg1 = IE->getOperand(1);
FindAllocasForEvalOperations(arg0, allocas);
CurVal = arg1;
} else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
Value *arg0 = SV->getOperand(0);
Value *arg1 = SV->getOperand(1);
FindAllocasForEvalOperations(
arg0, allocas); // Shuffle vector could come from different allocas
CurVal = arg1;
} else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
CurVal = EE->getOperand(0);
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
CurVal = LI->getOperand(0);
} else {
break;
}
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
allocas.insert(AI);
}
}
// This is needed in order to translate EvaluateAttribute operations that traces
// back to LoadInput operations during translation stage. Promoting load/store
// instructions beforehand will allow us to easily trace back to loadInput from
// function call.
void DxilGenerationPass::LegalizeEvalOperations(Module &M) {
for (Function &F : M.getFunctionList()) {
hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
if (group != HLOpcodeGroup::NotHL) {
std::vector<CallInst *> EvalFunctionCalls;
// Find all EvaluateAttribute calls
for (User *U : F.users()) {
if (CallInst *CI = dyn_cast<CallInst>(U)) {
IntrinsicOp evalOp = static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped) {
EvalFunctionCalls.push_back(CI);
}
}
}
if (EvalFunctionCalls.empty()) {
continue;
}
// Start from the call instruction, find all allocas that this call uses.
std::unordered_set<AllocaInst *> allocas;
for (CallInst *CI : EvalFunctionCalls) {
FindAllocasForEvalOperations(CI, allocas);
}
SSAUpdater SSA;
SmallVector<Instruction *, 4> Insts;
for (AllocaInst *AI : allocas) {
for (User *user : AI->users()) {
if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
Insts.emplace_back(cast<Instruction>(user));
}
}
LoadAndStorePromoter(Insts, SSA).run(Insts);
Insts.clear();
}
}
}
}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
namespace { namespace {

Просмотреть файл

@ -604,50 +604,90 @@ Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
} }
CallInst *ValidateLoadInput(Value *V) { bool IsValidLoadInput(Value *V) {
// Must be load input. // Must be load input.
// TODO: report this error on front-end
if (!isa<CallInst>(V)) {
V->getContext().emitError("attribute evaluation can only be done on values "
"taken directly from inputs");
return false;
}
CallInst *CI = cast<CallInst>(V); CallInst *CI = cast<CallInst>(V);
// Must be immediate. // Must be immediate.
ConstantInt *opArg = ConstantInt *opArg =
cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx)); cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue()); DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
// TODO: report error on front-end. if (op != DXIL::OpCode::LoadInput) {
// "attribute evaluation can only be done on values taken directly from inputs" V->getContext().emitError("attribute evaluation can only be done on values "
DXASSERT_LOCALVAR(op, op == DXIL::OpCode::LoadInput, "must be load input"); "taken directly from inputs");
return CI; return false;
}
return true;
}
// Apply current shuffle vector mask on top of previous shuffle mask.
// For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
// new mask would be (13,11,12,10)
Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
if (curMask == nullptr) {
return prevMask;
}
unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
SmallVector<uint32_t, 16> Elts;
for (unsigned i = 0; i != size; ++i) {
ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
ConstantInt *IVal =
cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
Elts.emplace_back(IVal->getSExtValue());
}
return ConstantDataVector::get(curMask->getContext(), Elts);
} }
Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) { Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
Constant *shufMask = nullptr; Constant *shufMask = nullptr;
if (V->getType()->isVectorTy()) { if (V->getType()->isVectorTy()) {
// Must be insert element inst. // Must be insert element inst. Keeping track of masks for shuffle vector
Value *Vec = V; Value *Vec = V;
if (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) { while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
Value *src0 = shuf->getOperand(0); shufMask = AccumulateMask(shufMask, shuf->getMask());
Value *src1 = shuf->getOperand(1); Vec = shuf->getOperand(0);
DXASSERT_LOCALVAR(src1, isa<UndefValue>(src1), "must be undef value");
shufMask = shuf->getMask();
Vec = src0;
} }
// TODO: We are assuming that the operand of insertelement is a LoadInput.
// This will fail on the case where we pass in matrix member using array subscript.
while (!isa<UndefValue>(Vec)) { while (!isa<UndefValue>(Vec)) {
InsertElementInst *insertInst = cast<InsertElementInst>(Vec); InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
Vec = insertInst->getOperand(0); Vec = insertInst->getOperand(0);
Value *Elt = insertInst->getOperand(1); Value *Elt = insertInst->getOperand(1);
CallInst *CI = ValidateLoadInput(Elt); if (IsValidLoadInput(Elt)) {
loadList.emplace_back(CI); loadList.emplace_back(cast<CallInst>(Elt));
}
} }
} else { } else {
CallInst *CI = ValidateLoadInput(V); if (IsValidLoadInput(V)) {
loadList.emplace_back(CI); loadList.emplace_back(cast<CallInst>(V));
}
} }
return shufMask; return shufMask;
} }
// Swizzle could reduce the dimensionality of the Type, but
// for temporary insertelement instructions should maintain the existing size of the loadinput.
// So we have to analyze the type of src in order to determine the actual size required.
Type *GetInsertElementTypeForEvaluate(Value *src) {
if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
return src->getType();
}
else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
return SV->getOperand(0)->getType();
}
src->getContext().emitError("Invalid type call for EvaluateAttribute function");
return nullptr;
}
Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP; hlsl::OP *hlslOP = &helper.hlslOP;
Type *Ty = CI->getType();
Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
IRBuilder<> Builder(CI); IRBuilder<> Builder(CI);
@ -659,6 +699,7 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
OP::OpCode opcode = OP::OpCode::EvalSampleIndex; OP::OpCode opcode = OP::OpCode::EvalSampleIndex;
Value *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
Type *Ty = GetInsertElementTypeForEvaluate(val);
Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
@ -679,7 +720,6 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP; hlsl::OP *hlslOP = &helper.hlslOP;
Type *Ty = CI->getType();
Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx); Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx); Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
IRBuilder<> Builder(CI); IRBuilder<> Builder(CI);
@ -693,7 +733,7 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
OP::OpCode opcode = OP::OpCode::EvalSnapped; OP::OpCode opcode = OP::OpCode::EvalSnapped;
Value *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
Type *Ty = GetInsertElementTypeForEvaluate(val);
Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
Value *result = UndefValue::get(Ty); Value *result = UndefValue::get(Ty);
@ -710,13 +750,13 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
return result; return result;
} }
Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op, Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP; hlsl::OP *hlslOP = &helper.hlslOP;
Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx); Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
std::vector<CallInst*> loadList; std::vector<CallInst*> loadList;
Constant *shufMask = GetLoadInputsForEvaluate(src, loadList); Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
Type *Ty = src->getType();
unsigned size = loadList.size(); unsigned size = loadList.size();
@ -726,6 +766,7 @@ Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
Value *opArg = hlslOP->GetU32Const((unsigned)opcode); Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
Type *Ty = GetInsertElementTypeForEvaluate(src);
Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType()); Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
Value *result = UndefValue::get(Ty); Value *result = UndefValue::get(Ty);

Просмотреть файл

@ -0,0 +1,11 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// CHECK: attribute evaluation can only be done on values taken directly from inputs
float4 color;
float4 main(float4 a : A) : SV_Target
{
float4 r = EvaluateAttributeCentroid(color);
return r;
}

Просмотреть файл

@ -0,0 +1,10 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// CHECK: evalCentroid
float4 main(float4x4 a : A) : SV_Target
{
float4 r = EvaluateAttributeCentroid(a)[0];
return r;
}

Просмотреть файл

@ -0,0 +1,10 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// CHECK: evalCentroid
float4 main(float4x4 a : A) : SV_Target
{
float4 r = EvaluateAttributeCentroid(a._12_31_23_41);
return r;
}

Просмотреть файл

@ -420,6 +420,9 @@ public:
TEST_METHOD(CodeGenEmptyStruct) TEST_METHOD(CodeGenEmptyStruct)
TEST_METHOD(CodeGenEarlyDepthStencil) TEST_METHOD(CodeGenEarlyDepthStencil)
TEST_METHOD(CodeGenEval) TEST_METHOD(CodeGenEval)
TEST_METHOD(CodeGenEvalInvalid)
TEST_METHOD(CodeGenEvalMat)
TEST_METHOD(CodeGenEvalMatMember)
TEST_METHOD(CodeGenEvalPos) TEST_METHOD(CodeGenEvalPos)
TEST_METHOD(CodeGenExternRes) TEST_METHOD(CodeGenExternRes)
TEST_METHOD(CodeGenFloatCast) TEST_METHOD(CodeGenFloatCast)
@ -2410,6 +2413,18 @@ TEST_F(CompilerTest, CodeGenEval) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\eval.hlsl"); CodeGenTestCheck(L"..\\CodeGenHLSL\\eval.hlsl");
} }
TEST_F(CompilerTest, CodeGenEvalInvalid) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\evalInvalid.hlsl");
}
TEST_F(CompilerTest, CodeGenEvalMat) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMat.hlsl");
}
TEST_F(CompilerTest, CodeGenEvalMatMember) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMatMember.hlsl");
}
TEST_F(CompilerTest, CodeGenEvalPos) { TEST_F(CompilerTest, CodeGenEvalPos) {
CodeGenTestCheck(L"..\\CodeGenHLSL\\evalPos.hlsl"); CodeGenTestCheck(L"..\\CodeGenHLSL\\evalPos.hlsl");
} }