Fix EvalauteAttribute Intrinsics (#275)

Currently the compiler crashes when we pass in matrix or vectors with reduced dimension from the original signature element for EvalAttribute functions. This change resolves this issue by replacing allocas before we translate these function calls from DXIR to DXIL and find LoadInputs to replace correctly.
2017-05-05 15:10:46 -07:00 · 2017-05-05 15:10:46 -07:00 · 47d21ad6c0
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@ -256,7 +256,7 @@ public:
    // Load up debug information, to cross-reference values and the instructions
    // used to load them.
    m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
-
+    LegalizeEvalOperations(M);
    if (!SM->IsCS()) {
      CreateDxilSignatures();
@ -381,6 +381,10 @@ private:
  // For validation
  std::unordered_map<unsigned, std::unordered_set<unsigned> > m_InputSemanticsUsed,
    m_OutputSemanticsUsed[4], m_PatchConstantSemanticsUsed, m_OtherSemanticsUsed;
  // For EvaluateAttribute operations.
  void LegalizeEvalOperations(Module &M);
  void FindAllocasForEvalOperations(Value *f, std::unordered_set<AllocaInst*> &allocas);
 };
 class SimplifyInst : public FunctionPass {
@ -2704,6 +2708,81 @@ void DxilGenerationPass::UpdateStructTypeForLegacyLayout() {
  UpdateStructTypeForLegacyLayoutOnHLM(*m_pHLModule);
 }
 // Find allocas for EvaluateAttribute operations
 void DxilGenerationPass::FindAllocasForEvalOperations(
    Value *val, std::unordered_set<AllocaInst *> &allocas) {
  Value *CurVal = val;
  while (!isa<AllocaInst>(CurVal)) {
    if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
      CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
      Value *arg0 =
          IE->getOperand(0); // Could be another insertelement or undef
      Value *arg1 = IE->getOperand(1);
      FindAllocasForEvalOperations(arg0, allocas);
      CurVal = arg1;
    } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
      Value *arg0 = SV->getOperand(0);
      Value *arg1 = SV->getOperand(1);
      FindAllocasForEvalOperations(
          arg0, allocas); // Shuffle vector could come from different allocas
      CurVal = arg1;
    } else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
      CurVal = EE->getOperand(0);
    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
      CurVal = LI->getOperand(0);
    } else {
      break;
    }
  }
  if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
    allocas.insert(AI);
  }
 }
 // This is needed in order to translate EvaluateAttribute operations that traces
 // back to LoadInput operations during translation stage. Promoting load/store
 // instructions beforehand will allow us to easily trace back to loadInput from
 // function call.
 void DxilGenerationPass::LegalizeEvalOperations(Module &M) {
  for (Function &F : M.getFunctionList()) {
    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
    if (group != HLOpcodeGroup::NotHL) {
      std::vector<CallInst *> EvalFunctionCalls;
      // Find all EvaluateAttribute calls
      for (User *U : F.users()) {
        if (CallInst *CI = dyn_cast<CallInst>(U)) {
          IntrinsicOp evalOp = static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
          if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
            evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
            evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped) {
            EvalFunctionCalls.push_back(CI);
          }
        }
      }
      if (EvalFunctionCalls.empty()) {
        continue;
      }
      // Start from the call instruction, find all allocas that this call uses.
      std::unordered_set<AllocaInst *> allocas;
      for (CallInst *CI : EvalFunctionCalls) {
        FindAllocasForEvalOperations(CI, allocas);
      }
      SSAUpdater SSA;
      SmallVector<Instruction *, 4> Insts;
      for (AllocaInst *AI : allocas) {
        for (User *user : AI->users()) {
          if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
            Insts.emplace_back(cast<Instruction>(user));
          }
        }
        LoadAndStorePromoter(Insts, SSA).run(Insts);
        Insts.clear();
      }
    }
  }
 }
 ///////////////////////////////////////////////////////////////////////////////
 namespace {
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@ -604,50 +604,90 @@ Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
 }
-CallInst *ValidateLoadInput(Value *V) {
+bool IsValidLoadInput(Value *V) {
  // Must be load input.
  // TODO: report this error on front-end
  if (!isa<CallInst>(V)) {
    V->getContext().emitError("attribute evaluation can only be done on values "
                              "taken directly from inputs");
    return false;
  }
  CallInst *CI = cast<CallInst>(V);
  // Must be immediate.
  ConstantInt *opArg =
      cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
  DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
-  // TODO: report error on front-end.
+  if (op != DXIL::OpCode::LoadInput) {
-  // "attribute evaluation can only be done on values taken directly from inputs"
+    V->getContext().emitError("attribute evaluation can only be done on values "
-  DXASSERT_LOCALVAR(op, op == DXIL::OpCode::LoadInput, "must be load input");
+                              "taken directly from inputs");
-  return CI;
+    return false;
  }
  return true;
 }
 // Apply current shuffle vector mask on top of previous shuffle mask.
 // For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
 // new mask would be (13,11,12,10)
 Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
  if (curMask == nullptr) {
    return prevMask;
  }
  unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
  SmallVector<uint32_t, 16> Elts;
  for (unsigned i = 0; i != size; ++i) {
    ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
    ConstantInt *IVal =
        cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
    Elts.emplace_back(IVal->getSExtValue());
  }
  return ConstantDataVector::get(curMask->getContext(), Elts);
 }
 Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
  Constant *shufMask = nullptr;
  if (V->getType()->isVectorTy()) {
-    // Must be insert element inst.
+    // Must be insert element inst. Keeping track of masks for shuffle vector
    Value *Vec = V;
-    if (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
+    while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
-      Value *src0 = shuf->getOperand(0);
+      shufMask = AccumulateMask(shufMask, shuf->getMask());
-      Value *src1 = shuf->getOperand(1);
+      Vec = shuf->getOperand(0);
      DXASSERT_LOCALVAR(src1, isa<UndefValue>(src1), "must be undef value");
      shufMask = shuf->getMask();
      Vec = src0;
    }
    // TODO: We are assuming that the operand of insertelement is a LoadInput.
    // This will fail on the case where we pass in matrix member using array subscript.
    while (!isa<UndefValue>(Vec)) {
      InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
      Vec = insertInst->getOperand(0);
      Value *Elt = insertInst->getOperand(1);
-      CallInst *CI = ValidateLoadInput(Elt);
+      if (IsValidLoadInput(Elt)) {
-      loadList.emplace_back(CI);
+        loadList.emplace_back(cast<CallInst>(Elt));
      }
    }
  } else {
-    CallInst *CI = ValidateLoadInput(V);
+    if (IsValidLoadInput(V)) {
-    loadList.emplace_back(CI);
+      loadList.emplace_back(cast<CallInst>(V));
    }
  }
  return shufMask;
 }
 // Swizzle could reduce the dimensionality of the Type, but
 // for temporary insertelement instructions should maintain the existing size of the loadinput.
 // So we have to analyze the type of src in order to determine the actual size required.
 Type *GetInsertElementTypeForEvaluate(Value *src) {
  if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
    return src->getType();
  }
  else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
    return SV->getOperand(0)->getType();
  }
  src->getContext().emitError("Invalid type call for EvaluateAttribute function");
  return nullptr;
 }
 Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                           HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  hlsl::OP *hlslOP = &helper.hlslOP;
  Type *Ty = CI->getType();
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  IRBuilder<> Builder(CI);
@ -659,6 +699,7 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  OP::OpCode opcode = OP::OpCode::EvalSampleIndex; 
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  Type *Ty = GetInsertElementTypeForEvaluate(val);
  Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
@ -679,7 +720,6 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
 Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                            HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  hlsl::OP *hlslOP = &helper.hlslOP;
  Type *Ty = CI->getType();
  Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
  Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
  IRBuilder<> Builder(CI);
@ -693,7 +733,7 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  OP::OpCode opcode = OP::OpCode::EvalSnapped; 
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  
+  Type *Ty = GetInsertElementTypeForEvaluate(val);
  Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  Value *result = UndefValue::get(Ty);
@ -710,13 +750,13 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  return result;
 }
 Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                            HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
  hlsl::OP *hlslOP = &helper.hlslOP;
  Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
  std::vector<CallInst*> loadList;
  Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
  Type *Ty = src->getType();
  unsigned size = loadList.size();
@ -726,6 +766,7 @@ Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
  Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
  Type *Ty = GetInsertElementTypeForEvaluate(src);
  Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
  Value *result = UndefValue::get(Ty);
--- a/tools/clang/test/CodeGenHLSL/evalInvalid.hlsl
+++ b/tools/clang/test/CodeGenHLSL/evalInvalid.hlsl
@ -0,0 +1,11 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // CHECK: attribute evaluation can only be done on values taken directly from inputs
 float4 color;
 float4 main(float4 a : A) : SV_Target
 {
  float4 r = EvaluateAttributeCentroid(color);
  return r;
 }
--- a/tools/clang/test/CodeGenHLSL/evalMat.hlsl
+++ b/tools/clang/test/CodeGenHLSL/evalMat.hlsl
@ -0,0 +1,10 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // CHECK: evalCentroid
 float4 main(float4x4 a : A) : SV_Target
 {
  float4 r = EvaluateAttributeCentroid(a)[0];
  return r;
 }
--- a/tools/clang/test/CodeGenHLSL/evalMatMember.hlsl
+++ b/tools/clang/test/CodeGenHLSL/evalMatMember.hlsl
@ -0,0 +1,10 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
 // CHECK: evalCentroid
 float4 main(float4x4 a : A) : SV_Target
 {
  float4 r = EvaluateAttributeCentroid(a._12_31_23_41);
  return r;
 }
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@ -420,6 +420,9 @@ public:
  TEST_METHOD(CodeGenEmptyStruct)
  TEST_METHOD(CodeGenEarlyDepthStencil)
  TEST_METHOD(CodeGenEval)
  TEST_METHOD(CodeGenEvalInvalid)
  TEST_METHOD(CodeGenEvalMat)
  TEST_METHOD(CodeGenEvalMatMember)
  TEST_METHOD(CodeGenEvalPos)
  TEST_METHOD(CodeGenExternRes)
  TEST_METHOD(CodeGenFloatCast)
@ -2410,6 +2413,18 @@ TEST_F(CompilerTest, CodeGenEval) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\eval.hlsl");
 }
 TEST_F(CompilerTest, CodeGenEvalInvalid) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalInvalid.hlsl");
 }
 TEST_F(CompilerTest, CodeGenEvalMat) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMat.hlsl");
 }
 TEST_F(CompilerTest, CodeGenEvalMatMember) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMatMember.hlsl");
 }
 TEST_F(CompilerTest, CodeGenEvalPos) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalPos.hlsl");
 }