From 47d21ad6c0e19eee4314741ad5f0135ce7d65394 Mon Sep 17 00:00:00 2001
From: Young Kim <youngseongkim93@gmail.com>
Date: Fri, 5 May 2017 15:10:46 -0700
Subject: [PATCH] Fix EvalauteAttribute Intrinsics (#275)

Currently the compiler crashes when we pass in matrix or vectors with reduced dimension from the original signature element for EvalAttribute functions. This change resolves this issue by replacing allocas before we translate these function calls from DXIR to DXIL and find LoadInputs to replace correctly.
---
 lib/HLSL/DxilGenerationPass.cpp               | 81 +++++++++++++++++-
 lib/HLSL/HLOperationLower.cpp                 | 85 ++++++++++++++-----
 tools/clang/test/CodeGenHLSL/evalInvalid.hlsl | 11 +++
 tools/clang/test/CodeGenHLSL/evalMat.hlsl     | 10 +++
 .../clang/test/CodeGenHLSL/evalMatMember.hlsl | 10 +++
 tools/clang/unittests/HLSL/CompilerTest.cpp   | 15 ++++
 6 files changed, 189 insertions(+), 23 deletions(-)
 create mode 100644 tools/clang/test/CodeGenHLSL/evalInvalid.hlsl
 create mode 100644 tools/clang/test/CodeGenHLSL/evalMat.hlsl
 create mode 100644 tools/clang/test/CodeGenHLSL/evalMatMember.hlsl
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index 5442c1981..9724a04f3 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -256,7 +256,7 @@ public:
     // Load up debug information, to cross-reference values and the instructions
     // used to load them.
     m_HasDbgInfo = getDebugMetadataVersionFromModule(M) != 0;
-
+    LegalizeEvalOperations(M);
     if (!SM->IsCS()) {
       CreateDxilSignatures();
 
@@ -381,6 +381,10 @@ private:
   // For validation
   std::unordered_map<unsigned, std::unordered_set<unsigned> > m_InputSemanticsUsed,
     m_OutputSemanticsUsed[4], m_PatchConstantSemanticsUsed, m_OtherSemanticsUsed;
+
+  // For EvaluateAttribute operations.
+  void LegalizeEvalOperations(Module &M);
+  void FindAllocasForEvalOperations(Value *f, std::unordered_set<AllocaInst*> &allocas);
 };
 
 class SimplifyInst : public FunctionPass {
@@ -2704,6 +2708,81 @@ void DxilGenerationPass::UpdateStructTypeForLegacyLayout() {
   UpdateStructTypeForLegacyLayoutOnHLM(*m_pHLModule);
 }
 
+// Find allocas for EvaluateAttribute operations
+void DxilGenerationPass::FindAllocasForEvalOperations(
+    Value *val, std::unordered_set<AllocaInst *> &allocas) {
+  Value *CurVal = val;
+  while (!isa<AllocaInst>(CurVal)) {
+    if (CallInst *CI = dyn_cast<CallInst>(CurVal)) {
+      CurVal = CI->getOperand(HLOperandIndex::kUnaryOpSrc0Idx);
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(CurVal)) {
+      Value *arg0 =
+          IE->getOperand(0); // Could be another insertelement or undef
+      Value *arg1 = IE->getOperand(1);
+      FindAllocasForEvalOperations(arg0, allocas);
+      CurVal = arg1;
+    } else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
+      Value *arg0 = SV->getOperand(0);
+      Value *arg1 = SV->getOperand(1);
+      FindAllocasForEvalOperations(
+          arg0, allocas); // Shuffle vector could come from different allocas
+      CurVal = arg1;
+    } else if (ExtractElementInst *EE = dyn_cast<ExtractElementInst>(CurVal)) {
+      CurVal = EE->getOperand(0);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurVal)) {
+      CurVal = LI->getOperand(0);
+    } else {
+      break;
+    }
+  }
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(CurVal)) {
+    allocas.insert(AI);
+  }
+}
+
+// This is needed in order to translate EvaluateAttribute operations that traces
+// back to LoadInput operations during translation stage. Promoting load/store
+// instructions beforehand will allow us to easily trace back to loadInput from
+// function call.
+void DxilGenerationPass::LegalizeEvalOperations(Module &M) {
+  for (Function &F : M.getFunctionList()) {
+    hlsl::HLOpcodeGroup group = hlsl::GetHLOpcodeGroup(&F);
+    if (group != HLOpcodeGroup::NotHL) {
+      std::vector<CallInst *> EvalFunctionCalls;
+      // Find all EvaluateAttribute calls
+      for (User *U : F.users()) {
+        if (CallInst *CI = dyn_cast<CallInst>(U)) {
+          IntrinsicOp evalOp = static_cast<IntrinsicOp>(hlsl::GetHLOpcode(CI));
+          if (evalOp == IntrinsicOp::IOP_EvaluateAttributeAtSample ||
+            evalOp == IntrinsicOp::IOP_EvaluateAttributeCentroid ||
+            evalOp == IntrinsicOp::IOP_EvaluateAttributeSnapped) {
+            EvalFunctionCalls.push_back(CI);
+          }
+        }
+      }
+      if (EvalFunctionCalls.empty()) {
+        continue;
+      }
+      // Start from the call instruction, find all allocas that this call uses.
+      std::unordered_set<AllocaInst *> allocas;
+      for (CallInst *CI : EvalFunctionCalls) {
+        FindAllocasForEvalOperations(CI, allocas);
+      }
+      SSAUpdater SSA;
+      SmallVector<Instruction *, 4> Insts;
+      for (AllocaInst *AI : allocas) {
+        for (User *user : AI->users()) {
+          if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
+            Insts.emplace_back(cast<Instruction>(user));
+          }
+        }
+        LoadAndStorePromoter(Insts, SSA).run(Insts);
+        Insts.clear();
+      }
+    }
+  }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 namespace {
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 206e9dd0e..0c892fbf4 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -604,50 +604,90 @@ Value *TranslateAddUint64(CallInst *CI, IntrinsicOp IOP,
 }
 
 
-CallInst *ValidateLoadInput(Value *V) {
+bool IsValidLoadInput(Value *V) {
   // Must be load input.
+  // TODO: report this error on front-end
+  if (!isa<CallInst>(V)) {
+    V->getContext().emitError("attribute evaluation can only be done on values "
+                              "taken directly from inputs");
+    return false;
+  }
   CallInst *CI = cast<CallInst>(V);
   // Must be immediate.
   ConstantInt *opArg =
       cast<ConstantInt>(CI->getArgOperand(DXIL::OperandIndex::kOpcodeIdx));
   DXIL::OpCode op = static_cast<DXIL::OpCode>(opArg->getLimitedValue());
-  // TODO: report error on front-end.
-  // "attribute evaluation can only be done on values taken directly from inputs"
-  DXASSERT_LOCALVAR(op, op == DXIL::OpCode::LoadInput, "must be load input");
-  return CI;
+  if (op != DXIL::OpCode::LoadInput) {
+    V->getContext().emitError("attribute evaluation can only be done on values "
+                              "taken directly from inputs");
+    return false;
+  }
+  return true;
+}
+
+// Apply current shuffle vector mask on top of previous shuffle mask.
+// For example, if previous mask is (12,11,10,13) and current mask is (3,1,0,2)
+// new mask would be (13,11,12,10)
+Constant *AccumulateMask(Constant *curMask, Constant *prevMask) {
+  if (curMask == nullptr) {
+    return prevMask;
+  }
+  unsigned size = cast<VectorType>(curMask->getType())->getNumElements();
+  SmallVector<uint32_t, 16> Elts;
+  for (unsigned i = 0; i != size; ++i) {
+    ConstantInt *Index = cast<ConstantInt>(curMask->getAggregateElement(i));
+    ConstantInt *IVal =
+        cast<ConstantInt>(prevMask->getAggregateElement(Index->getSExtValue()));
+    Elts.emplace_back(IVal->getSExtValue());
+  }
+  return ConstantDataVector::get(curMask->getContext(), Elts);
 }
 
 Constant *GetLoadInputsForEvaluate(Value *V, std::vector<CallInst*> &loadList) {
   Constant *shufMask = nullptr;
   if (V->getType()->isVectorTy()) {
-    // Must be insert element inst.
+    // Must be insert element inst. Keeping track of masks for shuffle vector
     Value *Vec = V;
-    if (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
-      Value *src0 = shuf->getOperand(0);
-      Value *src1 = shuf->getOperand(1);
-      DXASSERT_LOCALVAR(src1, isa<UndefValue>(src1), "must be undef value");
-      shufMask = shuf->getMask();
-      Vec = src0;
+    while (ShuffleVectorInst *shuf = dyn_cast<ShuffleVectorInst>(Vec)) {
+      shufMask = AccumulateMask(shufMask, shuf->getMask());
+      Vec = shuf->getOperand(0);
     }
 
+    // TODO: We are assuming that the operand of insertelement is a LoadInput.
+    // This will fail on the case where we pass in matrix member using array subscript.
     while (!isa<UndefValue>(Vec)) {
       InsertElementInst *insertInst = cast<InsertElementInst>(Vec);
       Vec = insertInst->getOperand(0);
       Value *Elt = insertInst->getOperand(1);
-      CallInst *CI = ValidateLoadInput(Elt);
-      loadList.emplace_back(CI);
+      if (IsValidLoadInput(Elt)) {
+        loadList.emplace_back(cast<CallInst>(Elt));
+      }
     }
   } else {
-    CallInst *CI = ValidateLoadInput(V);
-    loadList.emplace_back(CI);
+    if (IsValidLoadInput(V)) {
+      loadList.emplace_back(cast<CallInst>(V));
+    }
   }
   return shufMask;
 }
 
+// Swizzle could reduce the dimensionality of the Type, but
+// for temporary insertelement instructions should maintain the existing size of the loadinput.
+// So we have to analyze the type of src in order to determine the actual size required.
+Type *GetInsertElementTypeForEvaluate(Value *src) {
+  if (InsertElementInst *IE = dyn_cast<InsertElementInst>(src)) {
+    return src->getType();
+  }
+  else if (ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(src)) {
+    return SV->getOperand(0)->getType();
+  }
+  src->getContext().emitError("Invalid type call for EvaluateAttribute function");
+  return nullptr;
+}
+
 Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                            HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *sampleIdx = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
@@ -659,7 +699,8 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   OP::OpCode opcode = OP::OpCode::EvalSampleIndex; 
 
   Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  
+  Type *Ty = GetInsertElementTypeForEvaluate(val);
+
   Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
     
   Value *result = UndefValue::get(Ty);
@@ -679,7 +720,6 @@ Value *TranslateEvalSample(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
 Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                             HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
-  Type *Ty = CI->getType();
   Value *val = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc0Idx);
   Value *offset = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
   IRBuilder<> Builder(CI);
@@ -693,7 +733,7 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   OP::OpCode opcode = OP::OpCode::EvalSnapped; 
 
   Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  
+  Type *Ty = GetInsertElementTypeForEvaluate(val);
   Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
     
   Value *result = UndefValue::get(Ty);
@@ -710,13 +750,13 @@ Value *TranslateEvalSnapped(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   return result;
 }
 
+
 Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
                             HLOperationLowerHelper &helper,  HLObjectOperationLowerHelper *pObjHelper, bool &Translated) {
   hlsl::OP *hlslOP = &helper.hlslOP;
   Value *src = CI->getArgOperand(DXIL::OperandIndex::kUnarySrc0OpIdx);
   std::vector<CallInst*> loadList;
   Constant *shufMask = GetLoadInputsForEvaluate(src, loadList);
-  Type *Ty = src->getType();
 
   unsigned size = loadList.size();
 
@@ -725,7 +765,8 @@ Value *TranslateEvalCentroid(CallInst *CI, IntrinsicOp IOP, OP::OpCode op,
   OP::OpCode opcode = OP::OpCode::EvalCentroid; 
 
   Value *opArg = hlslOP->GetU32Const((unsigned)opcode);
-  
+
+  Type *Ty = GetInsertElementTypeForEvaluate(src);
   Function *evalFunc = hlslOP->GetOpFunc(opcode, Ty->getScalarType());
     
   Value *result = UndefValue::get(Ty);
diff --git a/tools/clang/test/CodeGenHLSL/evalInvalid.hlsl b/tools/clang/test/CodeGenHLSL/evalInvalid.hlsl
new file mode 100644
index 000000000..626799345
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/evalInvalid.hlsl
@@ -0,0 +1,11 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: attribute evaluation can only be done on values taken directly from inputs
+
+float4 color;
+float4 main(float4 a : A) : SV_Target
+{
+  float4 r = EvaluateAttributeCentroid(color);
+
+  return r;
+}
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenHLSL/evalMat.hlsl b/tools/clang/test/CodeGenHLSL/evalMat.hlsl
new file mode 100644
index 000000000..8983c68d3
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/evalMat.hlsl
@@ -0,0 +1,10 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: evalCentroid
+
+float4 main(float4x4 a : A) : SV_Target
+{
+  float4 r = EvaluateAttributeCentroid(a)[0];
+
+  return r;
+}
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenHLSL/evalMatMember.hlsl b/tools/clang/test/CodeGenHLSL/evalMatMember.hlsl
new file mode 100644
index 000000000..7b7228196
--- /dev/null
+++ b/tools/clang/test/CodeGenHLSL/evalMatMember.hlsl
@@ -0,0 +1,10 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: evalCentroid
+
+float4 main(float4x4 a : A) : SV_Target
+{
+  float4 r = EvaluateAttributeCentroid(a._12_31_23_41);
+
+  return r;
+}
\ No newline at end of file
diff --git a/tools/clang/unittests/HLSL/CompilerTest.cpp b/tools/clang/unittests/HLSL/CompilerTest.cpp
index 63a3c36cd..3e1eac75d 100644
--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@@ -420,6 +420,9 @@ public:
   TEST_METHOD(CodeGenEmptyStruct)
   TEST_METHOD(CodeGenEarlyDepthStencil)
   TEST_METHOD(CodeGenEval)
+  TEST_METHOD(CodeGenEvalInvalid)
+  TEST_METHOD(CodeGenEvalMat)
+  TEST_METHOD(CodeGenEvalMatMember)
   TEST_METHOD(CodeGenEvalPos)
   TEST_METHOD(CodeGenExternRes)
   TEST_METHOD(CodeGenFloatCast)
@@ -2410,6 +2413,18 @@ TEST_F(CompilerTest, CodeGenEval) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\eval.hlsl");
 }
 
+TEST_F(CompilerTest, CodeGenEvalInvalid) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalInvalid.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenEvalMat) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMat.hlsl");
+}
+
+TEST_F(CompilerTest, CodeGenEvalMatMember) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\evalMatMember.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenEvalPos) {
   CodeGenTestCheck(L"..\\CodeGenHLSL\\evalPos.hlsl");
 }