Fix RayQuery allocation for CSE, DCE, statics, arrays, and lifetimes (#2469)
Fixes problems like: - extra AllocateRayQuery calls, or improper location (for lifetime) - proper array support - static global RayQuery This RayQuery allocation changes: - Add a constructor to RayQuery - Set init sequence to use constructor in InitializeInitSequenceForHLSL, just for RayQuery - For array: modify EmitCXXAggrConstructorCall to - loop over index instead of pointer to allow SROA of RayQuery struct - mark the loop as HlslForceUnroll - Add hidden flag for HL intrinsics to allow internal intrinsic not produced by HLSL directly - mangle name so it can't be matched during parse. - Add hidden HL AllocateRayQuery intrinsic - Translate constructor call on ptr to HL AllocateRayQuery intrinsic call producing handle i32 during FinishCodeGen - Translate RayQuery ptr to load i32 handle value for intrinsic methods during SROA_HLSL - Flatten RayDesc for TraceRayInline (otherwise /Od fails validation since RayDesc type may still be present) - No longer skip RayQuery for SROA_HLSL - Update lowering for AllocateRayQuery, i32 handle, and flattened RayDesc - Remove ReadNone attribute from AllocateRayQuery to prevent incorrect CSE optimizations - Manually cleanup unused RayQuery allocations
This commit is contained in:
Родитель
9219b1fd9f
Коммит
2a01c58f73
|
@ -349,6 +349,9 @@ const unsigned kCreateHandleIndexOpIdx = 2; // Only for array of cbuffer.
|
|||
const unsigned kTraceRayRayDescOpIdx = 7;
|
||||
const unsigned kTraceRayPayLoadOpIdx = 8;
|
||||
|
||||
// CallShader.
|
||||
const unsigned kCallShaderPayloadOpIdx = 2;
|
||||
|
||||
// TraceRayInline.
|
||||
const unsigned kTraceRayInlineRayDescOpIdx = 5;
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import hctdb_instrhelp
|
|||
IOP_AddUint64,
|
||||
IOP_AllMemoryBarrier,
|
||||
IOP_AllMemoryBarrierWithGroupSync,
|
||||
IOP_AllocateRayQuery,
|
||||
IOP_CallShader,
|
||||
IOP_CheckAccessFullyMapped,
|
||||
IOP_D3DCOLORtoUBYTE4,
|
||||
|
|
|
@ -342,7 +342,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
|
|||
{ OC::WriteSamplerFeedbackGrad, "WriteSamplerFeedbackGrad", OCC::WriteSamplerFeedbackGrad, "writeSamplerFeedbackGrad", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
|
||||
|
||||
// Inline Ray Query void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute
|
||||
{ OC::AllocateRayQuery, "AllocateRayQuery", OCC::AllocateRayQuery, "allocateRayQuery", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, },
|
||||
{ OC::AllocateRayQuery, "AllocateRayQuery", OCC::AllocateRayQuery, "allocateRayQuery", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
|
||||
{ OC::RayQuery_TraceRayInline, "RayQuery_TraceRayInline", OCC::RayQuery_TraceRayInline, "rayQuery_TraceRayInline", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
|
||||
{ OC::RayQuery_Proceed, "RayQuery_Proceed", OCC::RayQuery_Proceed, "rayQuery_Proceed", { false, false, false, false, true, false, false, false, false, false, false}, Attribute::None, },
|
||||
{ OC::RayQuery_Abort, "RayQuery_Abort", OCC::RayQuery_Abort, "rayQuery_Abort", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
|
||||
|
|
|
@ -407,6 +407,9 @@ public:
|
|||
// Clear intermediate options that shouldn't be in the final DXIL
|
||||
DM.ClearIntermediateOptions();
|
||||
|
||||
// Remove unused AllocateRayQuery calls
|
||||
RemoveUnusedRayQuery(M);
|
||||
|
||||
if (IsLib && DXIL::CompareVersions(ValMajor, ValMinor, 1, 4) <= 0) {
|
||||
// 1.4 validator requires function annotations for all functions
|
||||
AddFunctionAnnotationForInitializers(M, DM);
|
||||
|
@ -577,6 +580,26 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveUnusedRayQuery(Module &M) {
|
||||
hlsl::OP *hlslOP = M.GetDxilModule().GetOP();
|
||||
llvm::Function *AllocFn = hlslOP->GetOpFunc(
|
||||
DXIL::OpCode::AllocateRayQuery, Type::getVoidTy(M.getContext()));
|
||||
SmallVector<CallInst*, 4> DeadInsts;
|
||||
for (auto U : AllocFn->users()) {
|
||||
if (CallInst *CI = dyn_cast<CallInst>(U)) {
|
||||
if (CI->user_empty()) {
|
||||
DeadInsts.emplace_back(CI);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto CI : DeadInsts) {
|
||||
CI->eraseFromParent();
|
||||
}
|
||||
if (AllocFn->user_empty()) {
|
||||
AllocFn->eraseFromParent();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -4800,56 +4800,16 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
|
|||
return Builder.CreateCall(F, Args);
|
||||
}
|
||||
|
||||
void AllocateRayQueryObjects(llvm::Module *M, HLOperationLowerHelper &helper) {
|
||||
// Iterate functions and insert AllocateRayQuery intrinsic to initialize
|
||||
// handle value for every alloca of ray query type
|
||||
hlsl::OP &hlslOP = helper.hlslOP;
|
||||
Constant *i32Zero = hlslOP.GetI32Const(0);
|
||||
DXIL::OpCode opcode = DXIL::OpCode::AllocateRayQuery;
|
||||
llvm::Value *opcodeVal = hlslOP.GetU32Const(static_cast<unsigned>(opcode));
|
||||
for (Function &f : M->functions()) {
|
||||
if (f.isDeclaration() || f.isIntrinsic() ||
|
||||
GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL)
|
||||
continue;
|
||||
// Iterate allocas
|
||||
BasicBlock &BB = f.getEntryBlock();
|
||||
IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&BB));
|
||||
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
|
||||
// Avoid invalidating the iterator.
|
||||
Instruction *I = BI++;
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
|
||||
llvm::Type *allocaTy = AI->getAllocatedType();
|
||||
llvm::Type *elementTy = allocaTy;
|
||||
while (elementTy->isArrayTy())
|
||||
elementTy = elementTy->getArrayElementType();
|
||||
if (dxilutil::IsHLSLRayQueryType(elementTy)) {
|
||||
DxilStructAnnotation *SA = helper.dxilTypeSys.GetStructAnnotation(cast<StructType>(elementTy));
|
||||
DXASSERT(SA, "otherwise, could not find type annoation for RayQuery specialization");
|
||||
DXASSERT(SA->GetNumTemplateArgs() == 1 && SA->GetTemplateArgAnnotation(0).IsIntegral(),
|
||||
"otherwise, RayQuery has changed, or lacks template args");
|
||||
Builder.SetInsertPoint(AI->getNextNode());
|
||||
DXASSERT(!allocaTy->isArrayTy(), "Array not handled yet");
|
||||
llvm::Function *AllocFn = hlslOP.GetOpFunc(DXIL::OpCode::AllocateRayQuery, Builder.getVoidTy());
|
||||
llvm::Value *rayFlags = ConstantInt::get(helper.i32Ty,
|
||||
APInt(32, SA->GetTemplateArgAnnotation(0).GetIntegral()));
|
||||
llvm::CallInst *CI = Builder.CreateCall(AllocFn, {opcodeVal, rayFlags}, "hRayQuery");
|
||||
llvm::Value *GEP = Builder.CreateGEP(AI, {i32Zero, i32Zero});
|
||||
Builder.CreateStore(CI, GEP);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// RayQuery methods
|
||||
|
||||
static Value* TranslateThisPointerToi32Handle(CallInst*CI, hlsl::OP *hlslOP)
|
||||
{
|
||||
IRBuilder<> Builder(CI);
|
||||
Value *thisArg = CI->getArgOperand(1);
|
||||
Constant *i32Zero = hlslOP->GetI32Const(0);
|
||||
Value *handleGEP = Builder.CreateGEP(thisArg, {i32Zero, i32Zero});
|
||||
Value *handleValue = Builder.CreateLoad(handleGEP);
|
||||
return handleValue;
|
||||
}
|
||||
Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
|
||||
HLOperationLowerHelper &helper,
|
||||
HLObjectOperationLowerHelper *pObjHelper,
|
||||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
Value *refArgs[] = {nullptr, CI->getOperand(1)};
|
||||
return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
|
||||
}
|
||||
|
||||
Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
|
||||
HLOperationLowerHelper &helper,
|
||||
|
@ -4857,49 +4817,35 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
|
|||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
|
||||
Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx);
|
||||
|
||||
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
|
||||
|
||||
Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
|
||||
Args[0] = opArg;
|
||||
|
||||
// Translate this pointer to i32 handle value
|
||||
Args[1] = TranslateThisPointerToi32Handle(CI, hlslOP);
|
||||
|
||||
for (unsigned i = 2; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
|
||||
for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
|
||||
Args[i] = CI->getArgOperand(i);
|
||||
}
|
||||
|
||||
IRBuilder<> Builder(CI);
|
||||
unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx;
|
||||
unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
|
||||
|
||||
// struct RayDesc
|
||||
//{
|
||||
// float3 Origin;
|
||||
// float TMin;
|
||||
// float3 Direction;
|
||||
// float TMax;
|
||||
//};
|
||||
IRBuilder<> Builder(CI);
|
||||
Value *zeroIdx = hlslOP->GetU32Const(0);
|
||||
Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
|
||||
origin = Builder.CreateLoad(origin);
|
||||
unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
|
||||
Value *origin = CI->getArgOperand(hlIndex++);
|
||||
Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
|
||||
Args[index++] = Builder.CreateExtractElement(origin, 1);
|
||||
Args[index++] = Builder.CreateExtractElement(origin, 2);
|
||||
|
||||
Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
|
||||
tmin = Builder.CreateLoad(tmin);
|
||||
Args[index++] = tmin;
|
||||
|
||||
Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
|
||||
direction = Builder.CreateLoad(direction);
|
||||
|
||||
// float TMin;
|
||||
Args[index++] = CI->getArgOperand(hlIndex++);
|
||||
// float3 Direction;
|
||||
Value *direction = CI->getArgOperand(hlIndex++);
|
||||
Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
|
||||
Args[index++] = Builder.CreateExtractElement(direction, 1);
|
||||
Args[index++] = Builder.CreateExtractElement(direction, 2);
|
||||
|
||||
Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
|
||||
tmax = Builder.CreateLoad(tmax);
|
||||
Args[index++] = tmax;
|
||||
// float TMax;
|
||||
Args[index++] = CI->getArgOperand(hlIndex++);
|
||||
//};
|
||||
|
||||
DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp);
|
||||
|
||||
|
@ -4915,8 +4861,9 @@ Value *TranslateCommitProceduralPrimitiveHit(CallInst *CI, IntrinsicOp IOP, OP::
|
|||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
|
||||
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
|
||||
Value *Args[] = {opArg,TranslateThisPointerToi32Handle(CI, hlslOP),THit};
|
||||
Value *Args[] = {opArg, handle, THit};
|
||||
|
||||
IRBuilder<> Builder(CI);
|
||||
Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
|
||||
|
@ -4931,13 +4878,14 @@ Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode
|
|||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
|
||||
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
|
||||
Value *Args[] = {opArg,TranslateThisPointerToi32Handle(CI, hlslOP)};
|
||||
Value *Args[] = {opArg, handle};
|
||||
|
||||
IRBuilder<> Builder(CI);
|
||||
Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
|
||||
|
||||
return Builder.CreateCall(F, Args);
|
||||
return Builder.CreateCall(F, {opArg, handle});
|
||||
}
|
||||
|
||||
Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
|
||||
|
@ -4946,7 +4894,7 @@ Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::Op
|
|||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
VectorType *Ty = cast<VectorType>(CI->getType());
|
||||
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
|
||||
Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
|
||||
uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
|
||||
|
@ -4962,7 +4910,7 @@ Value *TranslateRayQueryTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp I
|
|||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
VectorType *Ty = cast<VectorType>(CI->getType());
|
||||
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
|
||||
Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
|
||||
uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
|
||||
|
@ -4978,7 +4926,7 @@ Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode o
|
|||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
VectorType *Ty = cast<VectorType>(CI->getType());
|
||||
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
uint8_t elementVals[] = {0, 1};
|
||||
Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
|
||||
Value *retVal =
|
||||
|
@ -4992,7 +4940,7 @@ Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode o
|
|||
bool &Translated) {
|
||||
hlsl::OP *hlslOP = &helper.hlslOP;
|
||||
VectorType *Ty = cast<VectorType>(CI->getType());
|
||||
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
|
||||
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
|
||||
uint8_t elementVals[] = {0, 1, 2};
|
||||
Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
|
||||
Value *retVal =
|
||||
|
@ -5155,6 +5103,7 @@ IntrinsicLower gLowerTable[] = {
|
|||
{IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
|
||||
{IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
|
||||
{IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
|
||||
{IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery},
|
||||
{IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
|
||||
{IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
|
||||
{IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
|
||||
|
@ -7739,8 +7688,6 @@ void TranslateBuiltinOperations(
|
|||
|
||||
Module *M = HLM.GetModule();
|
||||
|
||||
AllocateRayQueryObjects(M, helper);
|
||||
|
||||
SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
|
||||
|
||||
// generate dxil operation
|
||||
|
|
|
@ -2507,10 +2507,53 @@ void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
|
|||
}
|
||||
}
|
||||
|
||||
// Flatten matching OldVal arg to NewElts, optionally loading values (loadElts).
|
||||
// Does not replace or clean up old CallInst.
|
||||
static CallInst *CreateFlattenedHLIntrinsicCall(
|
||||
CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
|
||||
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
|
||||
Function *F = CI->getCalledFunction();
|
||||
DXASSERT_NOMSG(group == HLOpcodeGroup::HLIntrinsic);
|
||||
unsigned opcode = GetHLOpcode(CI);
|
||||
IRBuilder<> Builder(CI);
|
||||
|
||||
SmallVector<Value *, 4> flatArgs;
|
||||
for (Value *arg : CI->arg_operands()) {
|
||||
if (arg == OldVal) {
|
||||
for (Value *Elt : NewElts) {
|
||||
if (loadElts && Elt->getType()->isPointerTy())
|
||||
Elt = Builder.CreateLoad(Elt);
|
||||
flatArgs.emplace_back(Elt);
|
||||
}
|
||||
} else
|
||||
flatArgs.emplace_back(arg);
|
||||
}
|
||||
|
||||
SmallVector<Type *, 4> flatParamTys;
|
||||
for (Value *arg : flatArgs)
|
||||
flatParamTys.emplace_back(arg->getType());
|
||||
FunctionType *flatFuncTy =
|
||||
FunctionType::get(CI->getType(), flatParamTys, false);
|
||||
Function *flatF =
|
||||
GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
|
||||
|
||||
return Builder.CreateCall(flatF, flatArgs);
|
||||
}
|
||||
|
||||
static CallInst *RewriteWithFlattenedHLIntrinsicCall(
|
||||
CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
|
||||
CallInst *flatCI = CreateFlattenedHLIntrinsicCall(
|
||||
CI, OldVal, NewElts, /*loadElts*/loadElts);
|
||||
CI->replaceAllUsesWith(flatCI);
|
||||
// Clear CI operands so we don't try to translate old call again
|
||||
for (auto& opit : CI->operands())
|
||||
opit.set(UndefValue::get(opit->getType()));
|
||||
return flatCI;
|
||||
}
|
||||
|
||||
/// RewriteCall - Replace OldVal with flattened NewElts in CallInst.
|
||||
void SROA_Helper::RewriteCall(CallInst *CI) {
|
||||
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
|
||||
Function *F = CI->getCalledFunction();
|
||||
if (group != HLOpcodeGroup::NotHL) {
|
||||
unsigned opcode = GetHLOpcode(CI);
|
||||
if (group == HLOpcodeGroup::HLIntrinsic) {
|
||||
|
@ -2519,36 +2562,9 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
|
|||
case IntrinsicOp::MOP_Append: {
|
||||
// Buffer Append already expand in code gen.
|
||||
// Must be OutputStream Append here.
|
||||
SmallVector<Value *, 4> flatArgs;
|
||||
for (Value *arg : CI->arg_operands()) {
|
||||
if (arg == OldVal) {
|
||||
// Flatten to arg.
|
||||
// Every Elt has a pointer type.
|
||||
// For Append, it's not a problem.
|
||||
for (Value *Elt : NewElts)
|
||||
flatArgs.emplace_back(Elt);
|
||||
} else
|
||||
flatArgs.emplace_back(arg);
|
||||
}
|
||||
|
||||
SmallVector<Type *, 4> flatParamTys;
|
||||
for (Value *arg : flatArgs)
|
||||
flatParamTys.emplace_back(arg->getType());
|
||||
// Don't need flat return type for Append.
|
||||
FunctionType *flatFuncTy =
|
||||
FunctionType::get(CI->getType(), flatParamTys, false);
|
||||
Function *flatF =
|
||||
GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
|
||||
IRBuilder<> Builder(CI);
|
||||
Builder.CreateCall(flatF, flatArgs);
|
||||
|
||||
// Append returns void, so it's not used by other instructions
|
||||
// and we don't need to replace it with flatCI.
|
||||
// However, we don't want to visit the same append again
|
||||
// when SROA'ing other arguments, as that would be O(n^2)
|
||||
// and we would attempt double-deleting the original call.
|
||||
for (auto& opit : CI->operands())
|
||||
opit.set(UndefValue::get(opit->getType()));
|
||||
// Every Elt has a pointer type.
|
||||
// For Append, this is desired, so don't load.
|
||||
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/false);
|
||||
DeadInsts.push_back(CI);
|
||||
} break;
|
||||
case IntrinsicOp::IOP_TraceRay: {
|
||||
|
@ -2569,17 +2585,32 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
|
|||
/*bIn*/ true, /*bOut*/ false);
|
||||
} break;
|
||||
case IntrinsicOp::IOP_CallShader: {
|
||||
RewriteCallArg(CI, HLOperandIndex::kBinaryOpSrc1Idx,
|
||||
RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx,
|
||||
/*bIn*/ true, /*bOut*/ true);
|
||||
} break;
|
||||
case IntrinsicOp::MOP_TraceRayInline: {
|
||||
if (OldVal ==
|
||||
CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) {
|
||||
RewriteCallArg(CI, HLOperandIndex::kTraceRayInlineRayDescOpIdx,
|
||||
/*bIn*/ true, /*bOut*/ false);
|
||||
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
|
||||
DeadInsts.push_back(CI);
|
||||
break;
|
||||
}
|
||||
} break;
|
||||
}
|
||||
__fallthrough;
|
||||
default:
|
||||
// RayQuery this pointer replacement.
|
||||
if (OldVal->getType()->isPointerTy() &&
|
||||
CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx &&
|
||||
OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) &&
|
||||
dxilutil::IsHLSLRayQueryType(
|
||||
OldVal->getType()->getPointerElementType())) {
|
||||
// For RayQuery methods, we want to replace the RayQuery this pointer
|
||||
// with a load and use of the underlying handle value.
|
||||
// This will allow elimination of RayQuery types earlier.
|
||||
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
|
||||
DeadInsts.push_back(CI);
|
||||
break;
|
||||
}
|
||||
DXASSERT(0, "cannot flatten hlsl intrinsic.");
|
||||
}
|
||||
}
|
||||
|
@ -2722,7 +2753,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
|
|||
|
||||
if (StructType *ST = dyn_cast<StructType>(Ty)) {
|
||||
// Skip HLSL object types and RayQuery.
|
||||
if (dxilutil::IsHLSLObjectType(ST) || dxilutil::IsHLSLRayQueryType(ST)) {
|
||||
if (dxilutil::IsHLSLObjectType(ST)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -4647,8 +4678,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
|
|||
IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
|
||||
|
||||
// Lower resource type to handle ty.
|
||||
if (dxilutil::IsHLSLObjectType(Ty) &&
|
||||
!HLModule::IsStreamOutputPtrType(V->getType())) {
|
||||
if (dxilutil::IsHLSLResourceType(Ty)) {
|
||||
Value *Res = V;
|
||||
if (!bOut) {
|
||||
Value *LdRes = Builder.CreateLoad(Res);
|
||||
|
@ -4668,7 +4698,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
|
|||
arraySize *= AT->getArrayNumElements();
|
||||
AT = AT->getArrayElementType();
|
||||
}
|
||||
if (dxilutil::IsHLSLObjectType(AT)) {
|
||||
if (dxilutil::IsHLSLResourceType(AT)) {
|
||||
Value *Res = V;
|
||||
Type *Ty = ArrayType::get(HandleTy, arraySize);
|
||||
V = AllocaBuilder.CreateAlloca(Ty);
|
||||
|
|
|
@ -322,6 +322,7 @@ clang::CXXRecordDecl* DeclareTemplateTypeWithHandle(
|
|||
|
||||
clang::CXXRecordDecl* DeclareUIntTemplatedTypeWithHandle(
|
||||
clang::ASTContext& context, llvm::StringRef typeName, llvm::StringRef templateParamName);
|
||||
clang::CXXRecordDecl* DeclareRayQueryType(clang::ASTContext& context);
|
||||
|
||||
/// <summary>Create a function template declaration for the specified method.</summary>
|
||||
/// <param name="context">AST context in which to work.</param>
|
||||
|
@ -382,6 +383,7 @@ unsigned GetHLSLOutputPatchCount(clang::QualType type);
|
|||
bool IsHLSLSubobjectType(clang::QualType type);
|
||||
bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKind,
|
||||
DXIL::HitGroupType &ghType);
|
||||
bool IsHLSLRayQueryType(clang::QualType type);
|
||||
|
||||
bool IsArrayConstantStringType(const clang::QualType type);
|
||||
bool IsPointerStringType(const clang::QualType type);
|
||||
|
|
|
@ -39,6 +39,7 @@ static const bool DelayTypeCreationTrue = true; // delay type creation for a d
|
|||
static const SourceLocation NoLoc; // no source location attribution available
|
||||
static const bool InlineFalse = false; // namespace is not an inline namespace
|
||||
static const bool InlineSpecifiedFalse = false; // function was not specified as inline
|
||||
static const bool ExplicitFalse = false; // constructor was not specified as explicit
|
||||
static const bool IsConstexprFalse = false; // function is not constexpr
|
||||
static const bool VirtualFalse = false; // whether the base class is declares 'virtual'
|
||||
static const bool BaseClassFalse = false; // whether the base class is declared as 'class' (vs. 'struct')
|
||||
|
@ -733,6 +734,28 @@ void AssociateParametersToFunctionPrototype(
|
|||
}
|
||||
}
|
||||
|
||||
static void CreateConstructorDeclaration(
|
||||
ASTContext &context, _In_ CXXRecordDecl *recordDecl, QualType resultType,
|
||||
ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
|
||||
_Out_ CXXConstructorDecl **constructorDecl, _Out_ TypeSourceInfo **tinfo) {
|
||||
DXASSERT_NOMSG(recordDecl != nullptr);
|
||||
DXASSERT_NOMSG(constructorDecl != nullptr);
|
||||
|
||||
FunctionProtoType::ExtProtoInfo functionExtInfo;
|
||||
functionExtInfo.TypeQuals = isConst ? Qualifiers::Const : 0;
|
||||
QualType functionQT = context.getFunctionType(
|
||||
resultType, args, functionExtInfo, ArrayRef<ParameterModifier>());
|
||||
DeclarationNameInfo declNameInfo(declarationName, NoLoc);
|
||||
*tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc);
|
||||
DXASSERT_NOMSG(*tinfo != nullptr);
|
||||
*constructorDecl = CXXConstructorDecl::Create(
|
||||
context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo,
|
||||
StorageClass::SC_None, ExplicitFalse, InlineSpecifiedFalse, IsConstexprFalse);
|
||||
DXASSERT_NOMSG(*constructorDecl != nullptr);
|
||||
(*constructorDecl)->setLexicalDeclContext(recordDecl);
|
||||
(*constructorDecl)->setAccess(AccessSpecifier::AS_public);
|
||||
}
|
||||
|
||||
static void CreateObjectFunctionDeclaration(
|
||||
ASTContext &context, _In_ CXXRecordDecl *recordDecl, QualType resultType,
|
||||
ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
|
||||
|
@ -800,7 +823,7 @@ CXXMethodDecl* hlsl::CreateObjectFunctionDeclarationWithParams(
|
|||
|
||||
CXXRecordDecl* hlsl::DeclareUIntTemplatedTypeWithHandle(
|
||||
ASTContext& context, StringRef typeName, StringRef templateParamName) {
|
||||
// template<uint kind> RayQuery/FeedbackTexture2D[Array] { ... }
|
||||
// template<uint kind> FeedbackTexture2D[Array] { ... }
|
||||
BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), typeName);
|
||||
typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy);
|
||||
typeDeclBuilder.startDefinition();
|
||||
|
@ -808,6 +831,24 @@ CXXRecordDecl* hlsl::DeclareUIntTemplatedTypeWithHandle(
|
|||
return typeDeclBuilder.completeDefinition();
|
||||
}
|
||||
|
||||
CXXRecordDecl* hlsl::DeclareRayQueryType(ASTContext& context) {
|
||||
// template<uint kind> RayQuery { ... }
|
||||
BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), "RayQuery");
|
||||
typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy);
|
||||
typeDeclBuilder.startDefinition();
|
||||
typeDeclBuilder.addField("h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
|
||||
|
||||
// Add constructor that will be lowered to the intrinsic that produces
|
||||
// the RayQuery handle for this object.
|
||||
CanQualType canQualType = typeDeclBuilder.getRecordDecl()->getTypeForDecl()->getCanonicalTypeUnqualified();
|
||||
CXXConstructorDecl *pConstructorDecl = nullptr;
|
||||
TypeSourceInfo *pTypeSourceInfo = nullptr;
|
||||
CreateConstructorDeclaration(context, typeDeclBuilder.getRecordDecl(), context.VoidTy, {}, context.DeclarationNames.getCXXConstructorName(canQualType), false, &pConstructorDecl, &pTypeSourceInfo);
|
||||
typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl);
|
||||
|
||||
return typeDeclBuilder.completeDefinition();
|
||||
}
|
||||
|
||||
bool hlsl::IsIntrinsicOp(const clang::FunctionDecl *FD) {
|
||||
return FD != nullptr && FD->hasAttr<HLSLIntrinsicAttr>();
|
||||
}
|
||||
|
|
|
@ -583,6 +583,20 @@ bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKi
|
|||
return false;
|
||||
}
|
||||
|
||||
bool IsHLSLRayQueryType(clang::QualType type) {
|
||||
type = type.getCanonicalType();
|
||||
if (const RecordType *RT = dyn_cast<RecordType>(type)) {
|
||||
if (const ClassTemplateSpecializationDecl *templateDecl =
|
||||
dyn_cast<ClassTemplateSpecializationDecl>(
|
||||
RT->getAsCXXRecordDecl())) {
|
||||
StringRef name = templateDecl->getName();
|
||||
if (name == "RayQuery")
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
QualType GetHLSLResourceResultType(QualType type) {
|
||||
// Don't canonicalize the type as to not lose snorm in Buffer<snorm float>
|
||||
const RecordType *RT = type->getAs<RecordType>();
|
||||
|
|
|
@ -1725,17 +1725,29 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
|
|||
EmitBlock(loopBB);
|
||||
}
|
||||
|
||||
// Find the end of the array.
|
||||
llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
|
||||
"arrayctor.end");
|
||||
// HLSL Change Begin: Loop on index instead of ptr
|
||||
//// Find the end of the array.
|
||||
//llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
|
||||
// "arrayctor.end");
|
||||
// HLSL Change End
|
||||
|
||||
// Enter the loop, setting up a phi for the current location to initialize.
|
||||
llvm::BasicBlock *entryBB = Builder.GetInsertBlock();
|
||||
llvm::BasicBlock *loopBB = createBasicBlock("arrayctor.loop");
|
||||
EmitBlock(loopBB);
|
||||
llvm::PHINode *cur = Builder.CreatePHI(arrayBegin->getType(), 2,
|
||||
"arrayctor.cur");
|
||||
cur->addIncoming(arrayBegin, entryBB);
|
||||
|
||||
// HLSL Change Begin: Loop on index instead of ptr
|
||||
//llvm::PHINode *cur = Builder.CreatePHI(arrayBegin->getType(), 2,
|
||||
// "arrayctor.cur");
|
||||
//cur->addIncoming(arrayBegin, entryBB);
|
||||
llvm::PHINode *idx = Builder.CreatePHI(numElements->getType(), 2,
|
||||
"arrayctor.idx");
|
||||
idx->addIncoming(
|
||||
llvm::ConstantInt::get(numElements->getType(), (uint64_t)0), entryBB);
|
||||
llvm::Value *next = Builder.CreateAdd(idx,
|
||||
llvm::ConstantInt::get(idx->getType(), (uint64_t)1), "arrayctor.next");
|
||||
llvm::Value *cur = Builder.CreateInBoundsGEP(arrayBegin, {idx}, "arrayctor.cur");
|
||||
// HLSL Change End
|
||||
|
||||
// Inside the loop body, emit the constructor call on the array element.
|
||||
|
||||
|
@ -1768,16 +1780,31 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
|
|||
/*Delegating=*/false, cur, E);
|
||||
}
|
||||
|
||||
// Go to the next element.
|
||||
llvm::Value *next =
|
||||
Builder.CreateInBoundsGEP(cur, llvm::ConstantInt::get(SizeTy, 1),
|
||||
"arrayctor.next");
|
||||
cur->addIncoming(next, Builder.GetInsertBlock());
|
||||
// HLSL Change Begin: Loop on index instead of ptr
|
||||
//// Go to the next element.
|
||||
//llvm::Value *next =
|
||||
// Builder.CreateInBoundsGEP(cur, llvm::ConstantInt::get(SizeTy, 1),
|
||||
// "arrayctor.next");
|
||||
//cur->addIncoming(next, Builder.GetInsertBlock());
|
||||
idx->addIncoming(next, Builder.GetInsertBlock());
|
||||
// HLSL Change End
|
||||
|
||||
// Check whether that's the end of the loop.
|
||||
llvm::Value *done = Builder.CreateICmpEQ(next, arrayEnd, "arrayctor.done");
|
||||
// HLSL Change Begin: Loop on index instead of ptr
|
||||
//llvm::Value *done = Builder.CreateICmpEQ(next, arrayEnd, "arrayctor.done");
|
||||
llvm::Value *done = Builder.CreateICmpEQ(next, numElements, "arrayctor.done");
|
||||
// HLSL Change End
|
||||
|
||||
llvm::BasicBlock *contBB = createBasicBlock("arrayctor.cont");
|
||||
Builder.CreateCondBr(done, contBB, loopBB);
|
||||
llvm::TerminatorInst *TI = cast<llvm::TerminatorInst>( // HLSL Change, capture terminator
|
||||
Builder.CreateCondBr(done, contBB, loopBB));
|
||||
|
||||
// HLSL Change Begin: force unroll
|
||||
LoopAttributes loopAttr;
|
||||
loopAttr.HlslUnrollPolicy = LoopAttributes::HlslForceUnroll;
|
||||
LoopInfo loopInfo(loopBB, loopAttr);
|
||||
TI->setMetadata("llvm.loop", loopInfo.getLoopID());
|
||||
// HLSL Change End
|
||||
|
||||
// Patch the earlier check to skip over the loop.
|
||||
if (zeroCheckBranch) zeroCheckBranch->setSuccessor(0, contBB);
|
||||
|
|
|
@ -5067,6 +5067,57 @@ static void CreateWriteEnabledStaticGlobals(llvm::Module *M,
|
|||
}
|
||||
}
|
||||
|
||||
// Translate RayQuery constructor. From:
|
||||
// %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
|
||||
// To:
|
||||
// i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32 %flags)
|
||||
// %gep = GEP %"RayQuery<flags>" %ptr, 0, 0
|
||||
// store i32* %gep, i32 %handle
|
||||
// ; and replace uses of %call with %ptr
|
||||
void TranslateRayQueryConstructor(llvm::Module &M) {
|
||||
SmallVector<Function*, 4> Constructors;
|
||||
for (auto &F : M.functions()) {
|
||||
// Match templated RayQuery constructor instantiation by prefix and signature.
|
||||
// It should be impossible to achieve the same signature from HLSL.
|
||||
if (!F.getName().startswith("\01??0?$RayQuery@$"))
|
||||
continue;
|
||||
llvm::Type *Ty = F.getReturnType();
|
||||
if (!Ty->isPointerTy() || !dxilutil::IsHLSLRayQueryType(Ty->getPointerElementType()))
|
||||
continue;
|
||||
if (F.arg_size() != 1 || Ty != F.arg_begin()->getType())
|
||||
continue;
|
||||
Constructors.emplace_back(&F);
|
||||
}
|
||||
|
||||
for (auto pConstructorFunc : Constructors) {
|
||||
llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
|
||||
llvm::ConstantInt *i32Zero = llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
|
||||
llvm::FunctionType *funcTy = llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
|
||||
unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
|
||||
llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
|
||||
Function *opFunc = GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
|
||||
|
||||
while (!pConstructorFunc->user_empty()) {
|
||||
Value *V = *pConstructorFunc->user_begin();
|
||||
llvm::CallInst *CI = cast<CallInst>(V); // Must be call
|
||||
llvm::Value *pThis = CI->getArgOperand(0);
|
||||
llvm::StructType *pRQType = cast<llvm::StructType>(pThis->getType()->getPointerElementType());
|
||||
DxilStructAnnotation *SA = M.GetHLModule().GetTypeSystem().GetStructAnnotation(pRQType);
|
||||
DXASSERT(SA, "otherwise, could not find type annoation for RayQuery specialization");
|
||||
DXASSERT(SA->GetNumTemplateArgs() == 1 && SA->GetTemplateArgAnnotation(0).IsIntegral(),
|
||||
"otherwise, RayQuery has changed, or lacks template args");
|
||||
llvm::IRBuilder<> Builder(CI);
|
||||
llvm::Value *rayFlags = Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
|
||||
llvm::Value *Call = Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
|
||||
llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
|
||||
Builder.CreateStore(Call, GEP);
|
||||
CI->replaceAllUsesWith(pThis);
|
||||
CI->eraseFromParent();
|
||||
}
|
||||
pConstructorFunc->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
void CGMSHLSLRuntime::FinishCodeGen() {
|
||||
// Library don't have entry.
|
||||
if (!m_bIsLib) {
|
||||
|
@ -5138,6 +5189,9 @@ void CGMSHLSLRuntime::FinishCodeGen() {
|
|||
// Create Global variable and type annotation for each CBuffer.
|
||||
ConstructCBuffer(m_pHLModule, CBufferType, m_ConstVarAnnotationMap);
|
||||
|
||||
// Translate calls to RayQuery constructor into hl Allocate calls
|
||||
TranslateRayQueryConstructor(*m_pHLModule->GetModule());
|
||||
|
||||
if (!m_bIsLib) {
|
||||
// need this for "llvm.global_dtors"?
|
||||
ProcessCtorFunctions(TheModule ,"llvm.global_ctors",
|
||||
|
|
|
@ -3317,7 +3317,7 @@ private:
|
|||
break;
|
||||
}
|
||||
} else if (kind == AR_OBJECT_RAY_QUERY) {
|
||||
recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "RayQuery", "flags");
|
||||
recordDecl = DeclareRayQueryType(*m_context);
|
||||
}
|
||||
else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) {
|
||||
recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "FeedbackTexture2D", "kind");
|
||||
|
@ -6853,14 +6853,27 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
|
|||
DXASSERT_NOMSG(initSequence != nullptr);
|
||||
|
||||
// In HLSL there are no default initializers, eg float4x4 m();
|
||||
if (Kind.getKind() == InitializationKind::IK_Default) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Value initializers occur for temporaries with empty parens or braces.
|
||||
if (Kind.getKind() == InitializationKind::IK_Value) {
|
||||
m_sema->Diag(Kind.getLocation(), diag::err_hlsl_type_empty_init) << Entity.getType();
|
||||
SilenceSequenceDiagnostics(initSequence);
|
||||
// Except for RayQuery constructor (also handle InitializationKind::IK_Value)
|
||||
if (Kind.getKind() == InitializationKind::IK_Default ||
|
||||
Kind.getKind() == InitializationKind::IK_Value) {
|
||||
QualType destBaseType = m_context->getBaseElementType(Entity.getType());
|
||||
ArTypeObjectKind destBaseShape = GetTypeObjectKind(destBaseType);
|
||||
if (destBaseShape == AR_TOBJ_OBJECT) {
|
||||
const CXXRecordDecl *typeRecordDecl = destBaseType->getAsCXXRecordDecl();
|
||||
int index = FindObjectBasicKindIndex(GetRecordDeclForBuiltInOrStruct(typeRecordDecl));
|
||||
DXASSERT(index != -1, "otherwise can't find type we already determined was an object");
|
||||
if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) {
|
||||
CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin();
|
||||
initSequence->AddConstructorInitializationStep(
|
||||
Constructor, AccessSpecifier::AS_public, destBaseType, false, false, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Value initializers occur for temporaries with empty parens or braces.
|
||||
if (Kind.getKind() == InitializationKind::IK_Value) {
|
||||
m_sema->Diag(Kind.getLocation(), diag::err_hlsl_type_empty_init) << Entity.getType();
|
||||
SilenceSequenceDiagnostics(initSequence);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -2,7 +2,7 @@
|
|||
|
||||
// CHECK: main
|
||||
// After lowering, these would turn into multiple abs calls rather than a 4 x float
|
||||
// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 94,
|
||||
// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 95,
|
||||
|
||||
float4 main(float4 a : A) : SV_TARGET {
|
||||
return abs(a*a.yxxx);
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
|
||||
|
||||
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
|
||||
RaytracingAccelerationStructure RTAS;
|
||||
|
||||
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
|
||||
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
|
||||
}
|
||||
|
||||
int C;
|
||||
|
||||
float main(RayDesc rayDesc : RAYDESC) : OUT {
|
||||
// CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
|
||||
// Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
|
||||
// But that would require 2d array optimization when one index is constant.
|
||||
// CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ01:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ02:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ10:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ11:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ12:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery[2][3];
|
||||
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ12]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
DoTrace(rayQuery[1][2], rayDesc);
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ12]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
|
||||
rayQuery[1][2].TraceRayInline(RTAS, 1, 2, rayDesc);
|
||||
|
||||
// CHECK: %[[GEP:[^ ]+]] = getelementptr [6 x i32], [6 x i32]* %[[array]],
|
||||
// CHECK: %[[load:[^ ]+]] = load i32, i32* %[[GEP]]
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[load]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
DoTrace(rayQuery[0][C], rayDesc);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
|
||||
|
||||
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
|
||||
// CHECK: %[[RQ2:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ1:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
|
||||
// Additional allocations should have been cleaned up
|
||||
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178,
|
||||
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ2]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
|
||||
RaytracingAccelerationStructure RTAS;
|
||||
|
||||
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
|
||||
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
|
||||
}
|
||||
|
||||
int C;
|
||||
|
||||
float main(RayDesc rayDesc : RAYDESC) : OUT {
|
||||
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery[2];
|
||||
DoTrace(rayQuery[1], rayDesc);
|
||||
rayQuery[1].TraceRayInline(RTAS, 1, 2, rayDesc);
|
||||
DoTrace(rayQuery[0], rayDesc);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
|
||||
|
||||
RaytracingAccelerationStructure RTAS;
|
||||
|
||||
// We should eliminate these calls somehow in the future, but for now, that does not look like a legal optimization.
|
||||
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
static RayQuery<0> g_rayQueryArray[4];
|
||||
|
||||
// g_rayQueryUnused should be optimized away
|
||||
static RayQuery<0> g_rayQueryUnused;
|
||||
|
||||
void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
|
||||
// CHECK: %[[rayQuery0a:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
RayQuery<0> rayQuery0a;
|
||||
|
||||
// rayQuery0b should be completely optimized away
|
||||
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
RayQuery<0> rayQuery0b;
|
||||
g_rayQueryArray[i] = rayQuery0b; // Stored here, then overwritten with rayQuery0a
|
||||
g_rayQueryArray[i] = rayQuery0a;
|
||||
|
||||
// No separate allocation, just a handle copy
|
||||
// optimizations should have eliminated load from global array
|
||||
// CHECK-NOT: load
|
||||
RayQuery<0> rayQuery0c = g_rayQueryArray[i];
|
||||
|
||||
// rayQuery0a is the one actually used here
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[rayQuery0a]],
|
||||
rayQuery0c.TraceRayInline(RTAS, 1, 2, rayDesc);
|
||||
|
||||
// AllocateRayQuery occurs here, rather than next to allocas
|
||||
// Should not be extray allocate, since above should allocate and copy
|
||||
// CHECK: %[[rayQuery1c:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
|
||||
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
|
||||
RayQuery<RAY_FLAG_FORCE_OPAQUE> rayQuery1c = RayQuery<RAY_FLAG_FORCE_OPAQUE>();
|
||||
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[rayQuery1c]],
|
||||
rayQuery1c.TraceRayInline(RTAS, 3, 4, rayDesc);
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
|
||||
|
||||
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
|
||||
// CHECK: %[[RQ1:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: %[[RQ2:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
|
||||
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ2]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
|
||||
|
||||
RaytracingAccelerationStructure RTAS;
|
||||
|
||||
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
|
||||
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
|
||||
}
|
||||
|
||||
float main(RayDesc rayDesc : RAYDESC) : OUT {
|
||||
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery1;
|
||||
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery2;
|
||||
DoTrace(rayQuery1, rayDesc);
|
||||
rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc);
|
||||
DoTrace(rayQuery2, rayDesc);
|
||||
return 0;
|
||||
}
|
|
@ -321,6 +321,9 @@ void [[]] SetMeshOutputCounts(in uint numVertices, in uint numPrimitives);
|
|||
// Amplification shader intrinsics:
|
||||
void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in uint threadGroupCountZ, in udt meshPayload);
|
||||
|
||||
// HL Op for allocating ray query object that default constructor uses
|
||||
uint [[hidden]] AllocateRayQuery(in uint flags);
|
||||
|
||||
} namespace
|
||||
|
||||
namespace StreamMethods {
|
||||
|
|
|
@ -1506,7 +1506,7 @@ class db_dxil(object):
|
|||
next_op_idx += 1
|
||||
|
||||
# RayQuery
|
||||
self.add_dxil_op("AllocateRayQuery", next_op_idx, "AllocateRayQuery", "allocates space for RayQuery and return handle", "v", "rn", [
|
||||
self.add_dxil_op("AllocateRayQuery", next_op_idx, "AllocateRayQuery", "allocates space for RayQuery and return handle", "v", "", [
|
||||
db_dxil_param(0, "i32", "", "handle to RayQuery state"),
|
||||
db_dxil_param(2, "u32", "constRayFlags", "Valid combination of RAY_FLAGS", is_const=True)])
|
||||
next_op_idx += 1
|
||||
|
@ -2572,7 +2572,7 @@ class db_hlsl_attribute(object):
|
|||
|
||||
class db_hlsl_intrinsic(object):
|
||||
"An HLSL intrinsic declaration"
|
||||
def __init__(self, name, idx, opname, params, ns, ns_idx, doc, ro, rn, unsigned_op, overload_idx):
|
||||
def __init__(self, name, idx, opname, params, ns, ns_idx, doc, ro, rn, unsigned_op, overload_idx, hidden):
|
||||
self.name = name # Function name
|
||||
self.idx = idx # Unique number within namespace
|
||||
self.opname = opname # D3D-style name
|
||||
|
@ -2588,6 +2588,7 @@ class db_hlsl_intrinsic(object):
|
|||
if unsigned_op != "":
|
||||
self.unsigned_op = "%s_%s" % (id_prefix, unsigned_op)
|
||||
self.overload_param_index = overload_idx # Parameter determines the overload type, -1 means ret type
|
||||
self.hidden = hidden # Internal high-level op, not exposed to HLSL
|
||||
self.key = ("%3d" % ns_idx) + "!" + name + "!" + ("%2d" % len(params)) + "!" + ("%3d" % idx) # Unique key
|
||||
self.vulkanSpecific = ns.startswith("Vk") # Vulkan specific intrinsic - SPIRV change
|
||||
|
||||
|
@ -2824,6 +2825,7 @@ class db_hlsl(object):
|
|||
readnone = False # Not read memory
|
||||
unsigned_op = "" # Unsigned opcode if exist
|
||||
overload_param_index = -1 # Parameter determines the overload type, -1 means ret type.
|
||||
hidden = False
|
||||
for a in attrs:
|
||||
if (a == ""):
|
||||
continue
|
||||
|
@ -2833,6 +2835,10 @@ class db_hlsl(object):
|
|||
if (a == "rn"):
|
||||
readnone = True
|
||||
continue
|
||||
if (a == "hidden"):
|
||||
hidden = True
|
||||
continue
|
||||
|
||||
assign = a.split('=')
|
||||
|
||||
if (len(assign) != 2):
|
||||
|
@ -2848,7 +2854,7 @@ class db_hlsl(object):
|
|||
continue
|
||||
assert False, "invalid attr %s" % (a)
|
||||
|
||||
return readonly, readnone, unsigned_op, overload_param_index
|
||||
return readonly, readnone, unsigned_op, overload_param_index, hidden
|
||||
|
||||
current_namespace = None
|
||||
for line in intrinsic_defs:
|
||||
|
@ -2881,7 +2887,7 @@ class db_hlsl(object):
|
|||
op = operand_match.group(1)
|
||||
if not op:
|
||||
op = name
|
||||
readonly, readnone, unsigned_op, overload_param_index = process_attr(attr)
|
||||
readonly, readnone, unsigned_op, overload_param_index, hidden = process_attr(attr)
|
||||
# Add an entry for this intrinsic.
|
||||
if bracket_cleanup_re.search(opts):
|
||||
opts = bracket_cleanup_re.sub(r"<\1@\2>", opts)
|
||||
|
@ -2905,7 +2911,7 @@ class db_hlsl(object):
|
|||
# TODO: verify a single level of indirection
|
||||
self.intrinsics.append(db_hlsl_intrinsic(
|
||||
name, num_entries, op, args, current_namespace, ns_idx, "pending doc for " + name,
|
||||
readonly, readnone, unsigned_op, overload_param_index))
|
||||
readonly, readnone, unsigned_op, overload_param_index, hidden))
|
||||
num_entries += 1
|
||||
continue
|
||||
assert False, "cannot parse line %s" % (line)
|
||||
|
|
|
@ -693,8 +693,13 @@ def get_hlsl_intrinsics():
|
|||
ns_table += " {(UINT)%s::%s_%s, %s, %s, %d, %d, g_%s_Args%s},\n" % (opcode_namespace, id_prefix, i.name, str(i.readonly).lower(), str(i.readnone).lower(), i.overload_param_index,len(i.params), last_ns, arg_idx)
|
||||
result += "static const HLSL_INTRINSIC_ARGUMENT g_%s_Args%s[] =\n{\n" % (last_ns, arg_idx)
|
||||
for p in i.params:
|
||||
name = p.name
|
||||
if name == i.name and i.hidden:
|
||||
# First parameter defines intrinsic name for parsing in HLSL.
|
||||
# Prepend '$hidden$' for hidden intrinsic so it can't be used in HLSL.
|
||||
name = "$hidden$" + name
|
||||
result += " {\"%s\", %s, %s, %s, %s, %s, %s, %s},\n" % (
|
||||
p.name, p.param_qual, p.template_id, p.template_list,
|
||||
name, p.param_qual, p.template_id, p.template_list,
|
||||
p.component_id, p.component_list, p.rows, p.cols)
|
||||
result += "};\n\n"
|
||||
arg_idx += 1
|
||||
|
|
Загрузка…
Ссылка в новой задаче