Fix RayQuery allocation for CSE, DCE, statics, arrays, and lifetimes (#2469)

Fixes problems like:
- extra AllocateRayQuery calls, or improper location (for lifetime)
- proper array support
- static global RayQuery

This RayQuery allocation changes:
- Add a constructor to RayQuery
- Set init sequence to use constructor in InitializeInitSequenceForHLSL, just for RayQuery
- For array: modify EmitCXXAggrConstructorCall to
  - loop over index instead of pointer to allow SROA of RayQuery struct
  - mark the loop as HlslForceUnroll
- Add hidden flag for HL intrinsics to allow internal intrinsic not produced
  by HLSL directly - mangle name so it can't be matched during parse.
- Add hidden HL AllocateRayQuery intrinsic
- Translate constructor call on ptr to HL AllocateRayQuery intrinsic call producing handle i32 during FinishCodeGen
- Translate RayQuery ptr to load i32 handle value for intrinsic methods during SROA_HLSL
- Flatten RayDesc for TraceRayInline
  (otherwise /Od fails validation since RayDesc type may still be present)
- No longer skip RayQuery for SROA_HLSL
- Update lowering for AllocateRayQuery, i32 handle, and flattened RayDesc
- Remove ReadNone attribute from AllocateRayQuery to prevent incorrect CSE optimizations
- Manually cleanup unused RayQuery allocations
This commit is contained in:
Tex Riddell 2019-09-27 12:50:43 -07:00 коммит произвёл GitHub
Родитель 9219b1fd9f
Коммит 2a01c58f73
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
21 изменённых файлов: 892 добавлений и 589 удалений

Просмотреть файл

@ -349,6 +349,9 @@ const unsigned kCreateHandleIndexOpIdx = 2; // Only for array of cbuffer.
const unsigned kTraceRayRayDescOpIdx = 7;
const unsigned kTraceRayPayLoadOpIdx = 8;
// CallShader.
const unsigned kCallShaderPayloadOpIdx = 2;
// TraceRayInline.
const unsigned kTraceRayInlineRayDescOpIdx = 5;

Просмотреть файл

@ -25,6 +25,7 @@ import hctdb_instrhelp
IOP_AddUint64,
IOP_AllMemoryBarrier,
IOP_AllMemoryBarrierWithGroupSync,
IOP_AllocateRayQuery,
IOP_CallShader,
IOP_CheckAccessFullyMapped,
IOP_D3DCOLORtoUBYTE4,

Просмотреть файл

@ -342,7 +342,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
{ OC::WriteSamplerFeedbackGrad, "WriteSamplerFeedbackGrad", OCC::WriteSamplerFeedbackGrad, "writeSamplerFeedbackGrad", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
// Inline Ray Query void, h, f, d, i1, i8, i16, i32, i64, udt, obj , function attribute
{ OC::AllocateRayQuery, "AllocateRayQuery", OCC::AllocateRayQuery, "allocateRayQuery", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::ReadNone, },
{ OC::AllocateRayQuery, "AllocateRayQuery", OCC::AllocateRayQuery, "allocateRayQuery", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
{ OC::RayQuery_TraceRayInline, "RayQuery_TraceRayInline", OCC::RayQuery_TraceRayInline, "rayQuery_TraceRayInline", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },
{ OC::RayQuery_Proceed, "RayQuery_Proceed", OCC::RayQuery_Proceed, "rayQuery_Proceed", { false, false, false, false, true, false, false, false, false, false, false}, Attribute::None, },
{ OC::RayQuery_Abort, "RayQuery_Abort", OCC::RayQuery_Abort, "rayQuery_Abort", { true, false, false, false, false, false, false, false, false, false, false}, Attribute::None, },

Просмотреть файл

@ -407,6 +407,9 @@ public:
// Clear intermediate options that shouldn't be in the final DXIL
DM.ClearIntermediateOptions();
// Remove unused AllocateRayQuery calls
RemoveUnusedRayQuery(M);
if (IsLib && DXIL::CompareVersions(ValMajor, ValMinor, 1, 4) <= 0) {
// 1.4 validator requires function annotations for all functions
AddFunctionAnnotationForInitializers(M, DM);
@ -577,6 +580,26 @@ private:
}
}
}
void RemoveUnusedRayQuery(Module &M) {
hlsl::OP *hlslOP = M.GetDxilModule().GetOP();
llvm::Function *AllocFn = hlslOP->GetOpFunc(
DXIL::OpCode::AllocateRayQuery, Type::getVoidTy(M.getContext()));
SmallVector<CallInst*, 4> DeadInsts;
for (auto U : AllocFn->users()) {
if (CallInst *CI = dyn_cast<CallInst>(U)) {
if (CI->user_empty()) {
DeadInsts.emplace_back(CI);
}
}
}
for (auto CI : DeadInsts) {
CI->eraseFromParent();
}
if (AllocFn->user_empty()) {
AllocFn->eraseFromParent();
}
}
};
}

Просмотреть файл

@ -4800,56 +4800,16 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
return Builder.CreateCall(F, Args);
}
void AllocateRayQueryObjects(llvm::Module *M, HLOperationLowerHelper &helper) {
// Iterate functions and insert AllocateRayQuery intrinsic to initialize
// handle value for every alloca of ray query type
hlsl::OP &hlslOP = helper.hlslOP;
Constant *i32Zero = hlslOP.GetI32Const(0);
DXIL::OpCode opcode = DXIL::OpCode::AllocateRayQuery;
llvm::Value *opcodeVal = hlslOP.GetU32Const(static_cast<unsigned>(opcode));
for (Function &f : M->functions()) {
if (f.isDeclaration() || f.isIntrinsic() ||
GetHLOpcodeGroup(&f) != HLOpcodeGroup::NotHL)
continue;
// Iterate allocas
BasicBlock &BB = f.getEntryBlock();
IRBuilder<> Builder(dxilutil::FirstNonAllocaInsertionPt(&BB));
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
// Avoid invalidating the iterator.
Instruction *I = BI++;
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
llvm::Type *allocaTy = AI->getAllocatedType();
llvm::Type *elementTy = allocaTy;
while (elementTy->isArrayTy())
elementTy = elementTy->getArrayElementType();
if (dxilutil::IsHLSLRayQueryType(elementTy)) {
DxilStructAnnotation *SA = helper.dxilTypeSys.GetStructAnnotation(cast<StructType>(elementTy));
DXASSERT(SA, "otherwise, could not find type annoation for RayQuery specialization");
DXASSERT(SA->GetNumTemplateArgs() == 1 && SA->GetTemplateArgAnnotation(0).IsIntegral(),
"otherwise, RayQuery has changed, or lacks template args");
Builder.SetInsertPoint(AI->getNextNode());
DXASSERT(!allocaTy->isArrayTy(), "Array not handled yet");
llvm::Function *AllocFn = hlslOP.GetOpFunc(DXIL::OpCode::AllocateRayQuery, Builder.getVoidTy());
llvm::Value *rayFlags = ConstantInt::get(helper.i32Ty,
APInt(32, SA->GetTemplateArgAnnotation(0).GetIntegral()));
llvm::CallInst *CI = Builder.CreateCall(AllocFn, {opcodeVal, rayFlags}, "hRayQuery");
llvm::Value *GEP = Builder.CreateGEP(AI, {i32Zero, i32Zero});
Builder.CreateStore(CI, GEP);
}
}
}
}
}
// RayQuery methods
static Value* TranslateThisPointerToi32Handle(CallInst*CI, hlsl::OP *hlslOP)
{
IRBuilder<> Builder(CI);
Value *thisArg = CI->getArgOperand(1);
Constant *i32Zero = hlslOP->GetI32Const(0);
Value *handleGEP = Builder.CreateGEP(thisArg, {i32Zero, i32Zero});
Value *handleValue = Builder.CreateLoad(handleGEP);
return handleValue;
}
Value *TranslateAllocateRayQuery(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
HLOperationLowerHelper &helper,
HLObjectOperationLowerHelper *pObjHelper,
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
Value *refArgs[] = {nullptr, CI->getOperand(1)};
return TrivialDxilOperation(opcode, refArgs, helper.voidTy, CI, hlslOP);
}
Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
HLOperationLowerHelper &helper,
@ -4857,49 +4817,35 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx);
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp];
Args[0] = opArg;
// Translate this pointer to i32 handle value
Args[1] = TranslateThisPointerToi32Handle(CI, hlslOP);
for (unsigned i = 2; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) {
Args[i] = CI->getArgOperand(i);
}
IRBuilder<> Builder(CI);
unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx;
unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
// struct RayDesc
//{
// float3 Origin;
// float TMin;
// float3 Direction;
// float TMax;
//};
IRBuilder<> Builder(CI);
Value *zeroIdx = hlslOP->GetU32Const(0);
Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx});
origin = Builder.CreateLoad(origin);
unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx;
Value *origin = CI->getArgOperand(hlIndex++);
Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0);
Args[index++] = Builder.CreateExtractElement(origin, 1);
Args[index++] = Builder.CreateExtractElement(origin, 2);
Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)});
tmin = Builder.CreateLoad(tmin);
Args[index++] = tmin;
Value *direction = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)});
direction = Builder.CreateLoad(direction);
// float TMin;
Args[index++] = CI->getArgOperand(hlIndex++);
// float3 Direction;
Value *direction = CI->getArgOperand(hlIndex++);
Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0);
Args[index++] = Builder.CreateExtractElement(direction, 1);
Args[index++] = Builder.CreateExtractElement(direction, 2);
Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)});
tmax = Builder.CreateLoad(tmax);
Args[index++] = tmax;
// float TMax;
Args[index++] = CI->getArgOperand(hlIndex++);
//};
DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp);
@ -4915,8 +4861,9 @@ Value *TranslateCommitProceduralPrimitiveHit(CallInst *CI, IntrinsicOp IOP, OP::
hlsl::OP *hlslOP = &helper.hlslOP;
Value *THit = CI->getArgOperand(HLOperandIndex::kBinaryOpSrc1Idx);
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
Value *Args[] = {opArg,TranslateThisPointerToi32Handle(CI, hlslOP),THit};
Value *Args[] = {opArg, handle, THit};
IRBuilder<> Builder(CI);
Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy());
@ -4931,13 +4878,14 @@ Value *TranslateGenericRayQueryMethod(CallInst *CI, IntrinsicOp IOP, OP::OpCode
hlsl::OP *hlslOP = &helper.hlslOP;
Value *opArg = hlslOP->GetU32Const(static_cast<unsigned>(opcode));
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
Value *Args[] = {opArg,TranslateThisPointerToi32Handle(CI, hlslOP)};
Value *Args[] = {opArg, handle};
IRBuilder<> Builder(CI);
Function *F = hlslOP->GetOpFunc(opcode, CI->getType());
return Builder.CreateCall(F, Args);
return Builder.CreateCall(F, {opArg, handle});
}
Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
@ -4946,7 +4894,7 @@ Value *TranslateRayQueryMatrix3x4Operation(CallInst *CI, IntrinsicOp IOP, OP::Op
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
VectorType *Ty = cast<VectorType>(CI->getType());
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2};
Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3};
@ -4962,7 +4910,7 @@ Value *TranslateRayQueryTransposedMatrix3x4Operation(CallInst *CI, IntrinsicOp I
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
VectorType *Ty = cast<VectorType>(CI->getType());
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
uint32_t rVals[] = { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 };
Constant *rows = ConstantDataVector::get(CI->getContext(), rVals);
uint8_t cVals[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 };
@ -4978,7 +4926,7 @@ Value *TranslateRayQueryFloat2Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode o
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
VectorType *Ty = cast<VectorType>(CI->getType());
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
uint8_t elementVals[] = {0, 1};
Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
Value *retVal =
@ -4992,7 +4940,7 @@ Value *TranslateRayQueryFloat3Getter(CallInst *CI, IntrinsicOp IOP, OP::OpCode o
bool &Translated) {
hlsl::OP *hlslOP = &helper.hlslOP;
VectorType *Ty = cast<VectorType>(CI->getType());
Value* handle = TranslateThisPointerToi32Handle(CI, hlslOP);
Value *handle = CI->getArgOperand(HLOperandIndex::kHandleOpIdx);
uint8_t elementVals[] = {0, 1, 2};
Constant *element = ConstantDataVector::get(CI->getContext(), elementVals);
Value *retVal =
@ -5155,6 +5103,7 @@ IntrinsicLower gLowerTable[] = {
{IntrinsicOp::IOP_AddUint64, TranslateAddUint64, DXIL::OpCode::UAddc},
{IntrinsicOp::IOP_AllMemoryBarrier, TrivialBarrier, DXIL::OpCode::Barrier},
{IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync, TrivialBarrier, DXIL::OpCode::Barrier},
{IntrinsicOp::IOP_AllocateRayQuery, TranslateAllocateRayQuery, DXIL::OpCode::AllocateRayQuery},
{IntrinsicOp::IOP_CallShader, TranslateCallShader, DXIL::OpCode::CallShader},
{IntrinsicOp::IOP_CheckAccessFullyMapped, TranslateCheckAccess, DXIL::OpCode::CheckAccessFullyMapped},
{IntrinsicOp::IOP_D3DCOLORtoUBYTE4, TranslateD3DColorToUByte4, DXIL::OpCode::NumOpCodes},
@ -7739,8 +7688,6 @@ void TranslateBuiltinOperations(
Module *M = HLM.GetModule();
AllocateRayQueryObjects(M, helper);
SmallVector<Function *, 4> NonUniformResourceIndexIntrinsics;
// generate dxil operation

Просмотреть файл

@ -2507,10 +2507,53 @@ void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn,
}
}
// Flatten matching OldVal arg to NewElts, optionally loading values (loadElts).
// Does not replace or clean up old CallInst.
static CallInst *CreateFlattenedHLIntrinsicCall(
CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
Function *F = CI->getCalledFunction();
DXASSERT_NOMSG(group == HLOpcodeGroup::HLIntrinsic);
unsigned opcode = GetHLOpcode(CI);
IRBuilder<> Builder(CI);
SmallVector<Value *, 4> flatArgs;
for (Value *arg : CI->arg_operands()) {
if (arg == OldVal) {
for (Value *Elt : NewElts) {
if (loadElts && Elt->getType()->isPointerTy())
Elt = Builder.CreateLoad(Elt);
flatArgs.emplace_back(Elt);
}
} else
flatArgs.emplace_back(arg);
}
SmallVector<Type *, 4> flatParamTys;
for (Value *arg : flatArgs)
flatParamTys.emplace_back(arg->getType());
FunctionType *flatFuncTy =
FunctionType::get(CI->getType(), flatParamTys, false);
Function *flatF =
GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
return Builder.CreateCall(flatF, flatArgs);
}
static CallInst *RewriteWithFlattenedHLIntrinsicCall(
CallInst *CI, Value* OldVal, ArrayRef<Value*> NewElts, bool loadElts) {
CallInst *flatCI = CreateFlattenedHLIntrinsicCall(
CI, OldVal, NewElts, /*loadElts*/loadElts);
CI->replaceAllUsesWith(flatCI);
// Clear CI operands so we don't try to translate old call again
for (auto& opit : CI->operands())
opit.set(UndefValue::get(opit->getType()));
return flatCI;
}
/// RewriteCall - Replace OldVal with flattened NewElts in CallInst.
void SROA_Helper::RewriteCall(CallInst *CI) {
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
Function *F = CI->getCalledFunction();
if (group != HLOpcodeGroup::NotHL) {
unsigned opcode = GetHLOpcode(CI);
if (group == HLOpcodeGroup::HLIntrinsic) {
@ -2519,36 +2562,9 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
case IntrinsicOp::MOP_Append: {
// Buffer Append already expand in code gen.
// Must be OutputStream Append here.
SmallVector<Value *, 4> flatArgs;
for (Value *arg : CI->arg_operands()) {
if (arg == OldVal) {
// Flatten to arg.
// Every Elt has a pointer type.
// For Append, it's not a problem.
for (Value *Elt : NewElts)
flatArgs.emplace_back(Elt);
} else
flatArgs.emplace_back(arg);
}
SmallVector<Type *, 4> flatParamTys;
for (Value *arg : flatArgs)
flatParamTys.emplace_back(arg->getType());
// Don't need flat return type for Append.
FunctionType *flatFuncTy =
FunctionType::get(CI->getType(), flatParamTys, false);
Function *flatF =
GetOrCreateHLFunction(*F->getParent(), flatFuncTy, group, opcode);
IRBuilder<> Builder(CI);
Builder.CreateCall(flatF, flatArgs);
// Append returns void, so it's not used by other instructions
// and we don't need to replace it with flatCI.
// However, we don't want to visit the same append again
// when SROA'ing other arguments, as that would be O(n^2)
// and we would attempt double-deleting the original call.
for (auto& opit : CI->operands())
opit.set(UndefValue::get(opit->getType()));
// Every Elt has a pointer type.
// For Append, this is desired, so don't load.
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/false);
DeadInsts.push_back(CI);
} break;
case IntrinsicOp::IOP_TraceRay: {
@ -2569,17 +2585,32 @@ void SROA_Helper::RewriteCall(CallInst *CI) {
/*bIn*/ true, /*bOut*/ false);
} break;
case IntrinsicOp::IOP_CallShader: {
RewriteCallArg(CI, HLOperandIndex::kBinaryOpSrc1Idx,
RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx,
/*bIn*/ true, /*bOut*/ true);
} break;
case IntrinsicOp::MOP_TraceRayInline: {
if (OldVal ==
CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) {
RewriteCallArg(CI, HLOperandIndex::kTraceRayInlineRayDescOpIdx,
/*bIn*/ true, /*bOut*/ false);
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
DeadInsts.push_back(CI);
break;
}
} break;
}
__fallthrough;
default:
// RayQuery this pointer replacement.
if (OldVal->getType()->isPointerTy() &&
CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx &&
OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) &&
dxilutil::IsHLSLRayQueryType(
OldVal->getType()->getPointerElementType())) {
// For RayQuery methods, we want to replace the RayQuery this pointer
// with a load and use of the underlying handle value.
// This will allow elimination of RayQuery types earlier.
RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/true);
DeadInsts.push_back(CI);
break;
}
DXASSERT(0, "cannot flatten hlsl intrinsic.");
}
}
@ -2722,7 +2753,7 @@ bool SROA_Helper::DoScalarReplacement(Value *V, std::vector<Value *> &Elts,
if (StructType *ST = dyn_cast<StructType>(Ty)) {
// Skip HLSL object types and RayQuery.
if (dxilutil::IsHLSLObjectType(ST) || dxilutil::IsHLSLRayQueryType(ST)) {
if (dxilutil::IsHLSLObjectType(ST)) {
return false;
}
@ -4647,8 +4678,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(Builder.GetInsertPoint()));
// Lower resource type to handle ty.
if (dxilutil::IsHLSLObjectType(Ty) &&
!HLModule::IsStreamOutputPtrType(V->getType())) {
if (dxilutil::IsHLSLResourceType(Ty)) {
Value *Res = V;
if (!bOut) {
Value *LdRes = Builder.CreateLoad(Res);
@ -4668,7 +4698,7 @@ Value *SROA_Parameter_HLSL::castResourceArgIfRequired(
arraySize *= AT->getArrayNumElements();
AT = AT->getArrayElementType();
}
if (dxilutil::IsHLSLObjectType(AT)) {
if (dxilutil::IsHLSLResourceType(AT)) {
Value *Res = V;
Type *Ty = ArrayType::get(HandleTy, arraySize);
V = AllocaBuilder.CreateAlloca(Ty);

Просмотреть файл

@ -322,6 +322,7 @@ clang::CXXRecordDecl* DeclareTemplateTypeWithHandle(
clang::CXXRecordDecl* DeclareUIntTemplatedTypeWithHandle(
clang::ASTContext& context, llvm::StringRef typeName, llvm::StringRef templateParamName);
clang::CXXRecordDecl* DeclareRayQueryType(clang::ASTContext& context);
/// <summary>Create a function template declaration for the specified method.</summary>
/// <param name="context">AST context in which to work.</param>
@ -382,6 +383,7 @@ unsigned GetHLSLOutputPatchCount(clang::QualType type);
bool IsHLSLSubobjectType(clang::QualType type);
bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKind,
DXIL::HitGroupType &ghType);
bool IsHLSLRayQueryType(clang::QualType type);
bool IsArrayConstantStringType(const clang::QualType type);
bool IsPointerStringType(const clang::QualType type);

Просмотреть файл

@ -39,6 +39,7 @@ static const bool DelayTypeCreationTrue = true; // delay type creation for a d
static const SourceLocation NoLoc; // no source location attribution available
static const bool InlineFalse = false; // namespace is not an inline namespace
static const bool InlineSpecifiedFalse = false; // function was not specified as inline
static const bool ExplicitFalse = false; // constructor was not specified as explicit
static const bool IsConstexprFalse = false; // function is not constexpr
static const bool VirtualFalse = false; // whether the base class is declares 'virtual'
static const bool BaseClassFalse = false; // whether the base class is declared as 'class' (vs. 'struct')
@ -733,6 +734,28 @@ void AssociateParametersToFunctionPrototype(
}
}
static void CreateConstructorDeclaration(
ASTContext &context, _In_ CXXRecordDecl *recordDecl, QualType resultType,
ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
_Out_ CXXConstructorDecl **constructorDecl, _Out_ TypeSourceInfo **tinfo) {
DXASSERT_NOMSG(recordDecl != nullptr);
DXASSERT_NOMSG(constructorDecl != nullptr);
FunctionProtoType::ExtProtoInfo functionExtInfo;
functionExtInfo.TypeQuals = isConst ? Qualifiers::Const : 0;
QualType functionQT = context.getFunctionType(
resultType, args, functionExtInfo, ArrayRef<ParameterModifier>());
DeclarationNameInfo declNameInfo(declarationName, NoLoc);
*tinfo = context.getTrivialTypeSourceInfo(functionQT, NoLoc);
DXASSERT_NOMSG(*tinfo != nullptr);
*constructorDecl = CXXConstructorDecl::Create(
context, recordDecl, NoLoc, declNameInfo, functionQT, *tinfo,
StorageClass::SC_None, ExplicitFalse, InlineSpecifiedFalse, IsConstexprFalse);
DXASSERT_NOMSG(*constructorDecl != nullptr);
(*constructorDecl)->setLexicalDeclContext(recordDecl);
(*constructorDecl)->setAccess(AccessSpecifier::AS_public);
}
static void CreateObjectFunctionDeclaration(
ASTContext &context, _In_ CXXRecordDecl *recordDecl, QualType resultType,
ArrayRef<QualType> args, DeclarationName declarationName, bool isConst,
@ -800,7 +823,7 @@ CXXMethodDecl* hlsl::CreateObjectFunctionDeclarationWithParams(
CXXRecordDecl* hlsl::DeclareUIntTemplatedTypeWithHandle(
ASTContext& context, StringRef typeName, StringRef templateParamName) {
// template<uint kind> RayQuery/FeedbackTexture2D[Array] { ... }
// template<uint kind> FeedbackTexture2D[Array] { ... }
BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), typeName);
typeDeclBuilder.addIntegerTemplateParam(templateParamName, context.UnsignedIntTy);
typeDeclBuilder.startDefinition();
@ -808,6 +831,24 @@ CXXRecordDecl* hlsl::DeclareUIntTemplatedTypeWithHandle(
return typeDeclBuilder.completeDefinition();
}
CXXRecordDecl* hlsl::DeclareRayQueryType(ASTContext& context) {
// template<uint kind> RayQuery { ... }
BuiltinTypeDeclBuilder typeDeclBuilder(context.getTranslationUnitDecl(), "RayQuery");
typeDeclBuilder.addIntegerTemplateParam("flags", context.UnsignedIntTy);
typeDeclBuilder.startDefinition();
typeDeclBuilder.addField("h", context.UnsignedIntTy); // Add an 'h' field to hold the handle.
// Add constructor that will be lowered to the intrinsic that produces
// the RayQuery handle for this object.
CanQualType canQualType = typeDeclBuilder.getRecordDecl()->getTypeForDecl()->getCanonicalTypeUnqualified();
CXXConstructorDecl *pConstructorDecl = nullptr;
TypeSourceInfo *pTypeSourceInfo = nullptr;
CreateConstructorDeclaration(context, typeDeclBuilder.getRecordDecl(), context.VoidTy, {}, context.DeclarationNames.getCXXConstructorName(canQualType), false, &pConstructorDecl, &pTypeSourceInfo);
typeDeclBuilder.getRecordDecl()->addDecl(pConstructorDecl);
return typeDeclBuilder.completeDefinition();
}
bool hlsl::IsIntrinsicOp(const clang::FunctionDecl *FD) {
return FD != nullptr && FD->hasAttr<HLSLIntrinsicAttr>();
}

Просмотреть файл

@ -583,6 +583,20 @@ bool GetHLSLSubobjectKind(clang::QualType type, DXIL::SubobjectKind &subobjectKi
return false;
}
bool IsHLSLRayQueryType(clang::QualType type) {
type = type.getCanonicalType();
if (const RecordType *RT = dyn_cast<RecordType>(type)) {
if (const ClassTemplateSpecializationDecl *templateDecl =
dyn_cast<ClassTemplateSpecializationDecl>(
RT->getAsCXXRecordDecl())) {
StringRef name = templateDecl->getName();
if (name == "RayQuery")
return true;
}
}
return false;
}
QualType GetHLSLResourceResultType(QualType type) {
// Don't canonicalize the type as to not lose snorm in Buffer<snorm float>
const RecordType *RT = type->getAs<RecordType>();

Просмотреть файл

@ -1725,17 +1725,29 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
EmitBlock(loopBB);
}
// Find the end of the array.
llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
"arrayctor.end");
// HLSL Change Begin: Loop on index instead of ptr
//// Find the end of the array.
//llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
// "arrayctor.end");
// HLSL Change End
// Enter the loop, setting up a phi for the current location to initialize.
llvm::BasicBlock *entryBB = Builder.GetInsertBlock();
llvm::BasicBlock *loopBB = createBasicBlock("arrayctor.loop");
EmitBlock(loopBB);
llvm::PHINode *cur = Builder.CreatePHI(arrayBegin->getType(), 2,
"arrayctor.cur");
cur->addIncoming(arrayBegin, entryBB);
// HLSL Change Begin: Loop on index instead of ptr
//llvm::PHINode *cur = Builder.CreatePHI(arrayBegin->getType(), 2,
// "arrayctor.cur");
//cur->addIncoming(arrayBegin, entryBB);
llvm::PHINode *idx = Builder.CreatePHI(numElements->getType(), 2,
"arrayctor.idx");
idx->addIncoming(
llvm::ConstantInt::get(numElements->getType(), (uint64_t)0), entryBB);
llvm::Value *next = Builder.CreateAdd(idx,
llvm::ConstantInt::get(idx->getType(), (uint64_t)1), "arrayctor.next");
llvm::Value *cur = Builder.CreateInBoundsGEP(arrayBegin, {idx}, "arrayctor.cur");
// HLSL Change End
// Inside the loop body, emit the constructor call on the array element.
@ -1768,16 +1780,31 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
/*Delegating=*/false, cur, E);
}
// Go to the next element.
llvm::Value *next =
Builder.CreateInBoundsGEP(cur, llvm::ConstantInt::get(SizeTy, 1),
"arrayctor.next");
cur->addIncoming(next, Builder.GetInsertBlock());
// HLSL Change Begin: Loop on index instead of ptr
//// Go to the next element.
//llvm::Value *next =
// Builder.CreateInBoundsGEP(cur, llvm::ConstantInt::get(SizeTy, 1),
// "arrayctor.next");
//cur->addIncoming(next, Builder.GetInsertBlock());
idx->addIncoming(next, Builder.GetInsertBlock());
// HLSL Change End
// Check whether that's the end of the loop.
llvm::Value *done = Builder.CreateICmpEQ(next, arrayEnd, "arrayctor.done");
// HLSL Change Begin: Loop on index instead of ptr
//llvm::Value *done = Builder.CreateICmpEQ(next, arrayEnd, "arrayctor.done");
llvm::Value *done = Builder.CreateICmpEQ(next, numElements, "arrayctor.done");
// HLSL Change End
llvm::BasicBlock *contBB = createBasicBlock("arrayctor.cont");
Builder.CreateCondBr(done, contBB, loopBB);
llvm::TerminatorInst *TI = cast<llvm::TerminatorInst>( // HLSL Change, capture terminator
Builder.CreateCondBr(done, contBB, loopBB));
// HLSL Change Begin: force unroll
LoopAttributes loopAttr;
loopAttr.HlslUnrollPolicy = LoopAttributes::HlslForceUnroll;
LoopInfo loopInfo(loopBB, loopAttr);
TI->setMetadata("llvm.loop", loopInfo.getLoopID());
// HLSL Change End
// Patch the earlier check to skip over the loop.
if (zeroCheckBranch) zeroCheckBranch->setSuccessor(0, contBB);

Просмотреть файл

@ -5067,6 +5067,57 @@ static void CreateWriteEnabledStaticGlobals(llvm::Module *M,
}
}
// Translate RayQuery constructor. From:
// %call = call %"RayQuery<flags>" @<constructor>(%"RayQuery<flags>" %ptr)
// To:
// i32 %handle = AllocateRayQuery(i32 <IntrinsicOp::IOP_AllocateRayQuery>, i32 %flags)
// %gep = GEP %"RayQuery<flags>" %ptr, 0, 0
// store i32* %gep, i32 %handle
// ; and replace uses of %call with %ptr
void TranslateRayQueryConstructor(llvm::Module &M) {
SmallVector<Function*, 4> Constructors;
for (auto &F : M.functions()) {
// Match templated RayQuery constructor instantiation by prefix and signature.
// It should be impossible to achieve the same signature from HLSL.
if (!F.getName().startswith("\01??0?$RayQuery@$"))
continue;
llvm::Type *Ty = F.getReturnType();
if (!Ty->isPointerTy() || !dxilutil::IsHLSLRayQueryType(Ty->getPointerElementType()))
continue;
if (F.arg_size() != 1 || Ty != F.arg_begin()->getType())
continue;
Constructors.emplace_back(&F);
}
for (auto pConstructorFunc : Constructors) {
llvm::IntegerType *i32Ty = llvm::Type::getInt32Ty(M.getContext());
llvm::ConstantInt *i32Zero = llvm::ConstantInt::get(i32Ty, (uint64_t)0, false);
llvm::FunctionType *funcTy = llvm::FunctionType::get(i32Ty, {i32Ty, i32Ty}, false);
unsigned opcode = (unsigned)IntrinsicOp::IOP_AllocateRayQuery;
llvm::ConstantInt *opVal = llvm::ConstantInt::get(i32Ty, opcode, false);
Function *opFunc = GetOrCreateHLFunction(M, funcTy, HLOpcodeGroup::HLIntrinsic, opcode);
while (!pConstructorFunc->user_empty()) {
Value *V = *pConstructorFunc->user_begin();
llvm::CallInst *CI = cast<CallInst>(V); // Must be call
llvm::Value *pThis = CI->getArgOperand(0);
llvm::StructType *pRQType = cast<llvm::StructType>(pThis->getType()->getPointerElementType());
DxilStructAnnotation *SA = M.GetHLModule().GetTypeSystem().GetStructAnnotation(pRQType);
DXASSERT(SA, "otherwise, could not find type annoation for RayQuery specialization");
DXASSERT(SA->GetNumTemplateArgs() == 1 && SA->GetTemplateArgAnnotation(0).IsIntegral(),
"otherwise, RayQuery has changed, or lacks template args");
llvm::IRBuilder<> Builder(CI);
llvm::Value *rayFlags = Builder.getInt32(SA->GetTemplateArgAnnotation(0).GetIntegral());
llvm::Value *Call = Builder.CreateCall(opFunc, {opVal, rayFlags}, pThis->getName());
llvm::Value *GEP = Builder.CreateInBoundsGEP(pThis, {i32Zero, i32Zero});
Builder.CreateStore(Call, GEP);
CI->replaceAllUsesWith(pThis);
CI->eraseFromParent();
}
pConstructorFunc->eraseFromParent();
}
}
void CGMSHLSLRuntime::FinishCodeGen() {
// Library don't have entry.
if (!m_bIsLib) {
@ -5138,6 +5189,9 @@ void CGMSHLSLRuntime::FinishCodeGen() {
// Create Global variable and type annotation for each CBuffer.
ConstructCBuffer(m_pHLModule, CBufferType, m_ConstVarAnnotationMap);
// Translate calls to RayQuery constructor into hl Allocate calls
TranslateRayQueryConstructor(*m_pHLModule->GetModule());
if (!m_bIsLib) {
// need this for "llvm.global_dtors"?
ProcessCtorFunctions(TheModule ,"llvm.global_ctors",

Просмотреть файл

@ -3317,7 +3317,7 @@ private:
break;
}
} else if (kind == AR_OBJECT_RAY_QUERY) {
recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "RayQuery", "flags");
recordDecl = DeclareRayQueryType(*m_context);
}
else if (kind == AR_OBJECT_FEEDBACKTEXTURE2D) {
recordDecl = DeclareUIntTemplatedTypeWithHandle(*m_context, "FeedbackTexture2D", "kind");
@ -6853,14 +6853,27 @@ void HLSLExternalSource::InitializeInitSequenceForHLSL(
DXASSERT_NOMSG(initSequence != nullptr);
// In HLSL there are no default initializers, eg float4x4 m();
if (Kind.getKind() == InitializationKind::IK_Default) {
return;
}
// Value initializers occur for temporaries with empty parens or braces.
if (Kind.getKind() == InitializationKind::IK_Value) {
m_sema->Diag(Kind.getLocation(), diag::err_hlsl_type_empty_init) << Entity.getType();
SilenceSequenceDiagnostics(initSequence);
// Except for RayQuery constructor (also handle InitializationKind::IK_Value)
if (Kind.getKind() == InitializationKind::IK_Default ||
Kind.getKind() == InitializationKind::IK_Value) {
QualType destBaseType = m_context->getBaseElementType(Entity.getType());
ArTypeObjectKind destBaseShape = GetTypeObjectKind(destBaseType);
if (destBaseShape == AR_TOBJ_OBJECT) {
const CXXRecordDecl *typeRecordDecl = destBaseType->getAsCXXRecordDecl();
int index = FindObjectBasicKindIndex(GetRecordDeclForBuiltInOrStruct(typeRecordDecl));
DXASSERT(index != -1, "otherwise can't find type we already determined was an object");
if (g_ArBasicKindsAsTypes[index] == AR_OBJECT_RAY_QUERY) {
CXXConstructorDecl *Constructor = *typeRecordDecl->ctor_begin();
initSequence->AddConstructorInitializationStep(
Constructor, AccessSpecifier::AS_public, destBaseType, false, false, false);
return;
}
}
// Value initializers occur for temporaries with empty parens or braces.
if (Kind.getKind() == InitializationKind::IK_Value) {
m_sema->Diag(Kind.getLocation(), diag::err_hlsl_type_empty_init) << Entity.getType();
SilenceSequenceDiagnostics(initSequence);
}
return;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -2,7 +2,7 @@
// CHECK: main
// After lowering, these would turn into multiple abs calls rather than a 4 x float
// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 94,
// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 95,
float4 main(float4 a : A) : SV_TARGET {
return abs(a*a.yxxx);

Просмотреть файл

@ -0,0 +1,34 @@
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
RaytracingAccelerationStructure RTAS;
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
}
int C;
float main(RayDesc rayDesc : RAYDESC) : OUT {
// CHECK: %[[array:[^ ]+]] = alloca [6 x i32]
// Ideally, one for [1][2] statically indexed, and 3 for [0][C] dynamically indexed sub-array.
// But that would require 2d array optimization when one index is constant.
// CHECK: %[[RQ00:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ01:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ02:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ10:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ11:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ12:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery[2][3];
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ12]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
DoTrace(rayQuery[1][2], rayDesc);
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ12]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
rayQuery[1][2].TraceRayInline(RTAS, 1, 2, rayDesc);
// CHECK: %[[GEP:[^ ]+]] = getelementptr [6 x i32], [6 x i32]* %[[array]],
// CHECK: %[[load:[^ ]+]] = load i32, i32* %[[GEP]]
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[load]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
DoTrace(rayQuery[0][C], rayDesc);
return 0;
}

Просмотреть файл

@ -0,0 +1,28 @@
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
// CHECK: %[[RQ2:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ1:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// Additional allocations should have been cleaned up
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178,
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ2]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
RaytracingAccelerationStructure RTAS;
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
}
int C;
float main(RayDesc rayDesc : RAYDESC) : OUT {
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery[2];
DoTrace(rayQuery[1], rayDesc);
rayQuery[1].TraceRayInline(RTAS, 1, 2, rayDesc);
DoTrace(rayQuery[0], rayDesc);
return 0;
}

Просмотреть файл

@ -0,0 +1,42 @@
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
RaytracingAccelerationStructure RTAS;
// We should eliminate these calls somehow in the future, but for now, that does not look like a legal optimization.
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
// CHECK: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
static RayQuery<0> g_rayQueryArray[4];
// g_rayQueryUnused should be optimized away
static RayQuery<0> g_rayQueryUnused;
void main(uint i : IDX, RayDesc rayDesc : RAYDESC) {
// CHECK: %[[rayQuery0a:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
RayQuery<0> rayQuery0a;
// rayQuery0b should be completely optimized away
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
RayQuery<0> rayQuery0b;
g_rayQueryArray[i] = rayQuery0b; // Stored here, then overwritten with rayQuery0a
g_rayQueryArray[i] = rayQuery0a;
// No separate allocation, just a handle copy
// optimizations should have eliminated load from global array
// CHECK-NOT: load
RayQuery<0> rayQuery0c = g_rayQueryArray[i];
// rayQuery0a is the one actually used here
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[rayQuery0a]],
rayQuery0c.TraceRayInline(RTAS, 1, 2, rayDesc);
// AllocateRayQuery occurs here, rather than next to allocas
// Should not be extray allocate, since above should allocate and copy
// CHECK: %[[rayQuery1c:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 1)
// CHECK-NOT: call i32 @dx.op.allocateRayQuery(i32 178, i32 0)
RayQuery<RAY_FLAG_FORCE_OPAQUE> rayQuery1c = RayQuery<RAY_FLAG_FORCE_OPAQUE>();
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[rayQuery1c]],
rayQuery1c.TraceRayInline(RTAS, 3, 4, rayDesc);
}

Просмотреть файл

@ -0,0 +1,23 @@
// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s
// CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false)
// CHECK: %[[RQ1:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: %[[RQ2:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513)
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ1]], %dx.types.Handle %[[RTAS]], i32 1, i32 2,
// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ2]], %dx.types.Handle %[[RTAS]], i32 0, i32 1,
RaytracingAccelerationStructure RTAS;
void DoTrace(RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery, RayDesc rayDesc) {
rayQuery.TraceRayInline(RTAS, 0, 1, rayDesc);
}
float main(RayDesc rayDesc : RAYDESC) : OUT {
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery1;
RayQuery<RAY_FLAG_FORCE_OPAQUE|RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES> rayQuery2;
DoTrace(rayQuery1, rayDesc);
rayQuery1.TraceRayInline(RTAS, 1, 2, rayDesc);
DoTrace(rayQuery2, rayDesc);
return 0;
}

Просмотреть файл

@ -321,6 +321,9 @@ void [[]] SetMeshOutputCounts(in uint numVertices, in uint numPrimitives);
// Amplification shader intrinsics:
void [[]] DispatchMesh(in uint threadGroupCountX, in uint threadGroupCountY, in uint threadGroupCountZ, in udt meshPayload);
// HL Op for allocating ray query object that default constructor uses
uint [[hidden]] AllocateRayQuery(in uint flags);
} namespace
namespace StreamMethods {

Просмотреть файл

@ -1506,7 +1506,7 @@ class db_dxil(object):
next_op_idx += 1
# RayQuery
self.add_dxil_op("AllocateRayQuery", next_op_idx, "AllocateRayQuery", "allocates space for RayQuery and return handle", "v", "rn", [
self.add_dxil_op("AllocateRayQuery", next_op_idx, "AllocateRayQuery", "allocates space for RayQuery and return handle", "v", "", [
db_dxil_param(0, "i32", "", "handle to RayQuery state"),
db_dxil_param(2, "u32", "constRayFlags", "Valid combination of RAY_FLAGS", is_const=True)])
next_op_idx += 1
@ -2572,7 +2572,7 @@ class db_hlsl_attribute(object):
class db_hlsl_intrinsic(object):
"An HLSL intrinsic declaration"
def __init__(self, name, idx, opname, params, ns, ns_idx, doc, ro, rn, unsigned_op, overload_idx):
def __init__(self, name, idx, opname, params, ns, ns_idx, doc, ro, rn, unsigned_op, overload_idx, hidden):
self.name = name # Function name
self.idx = idx # Unique number within namespace
self.opname = opname # D3D-style name
@ -2588,6 +2588,7 @@ class db_hlsl_intrinsic(object):
if unsigned_op != "":
self.unsigned_op = "%s_%s" % (id_prefix, unsigned_op)
self.overload_param_index = overload_idx # Parameter determines the overload type, -1 means ret type
self.hidden = hidden # Internal high-level op, not exposed to HLSL
self.key = ("%3d" % ns_idx) + "!" + name + "!" + ("%2d" % len(params)) + "!" + ("%3d" % idx) # Unique key
self.vulkanSpecific = ns.startswith("Vk") # Vulkan specific intrinsic - SPIRV change
@ -2824,6 +2825,7 @@ class db_hlsl(object):
readnone = False # Not read memory
unsigned_op = "" # Unsigned opcode if exist
overload_param_index = -1 # Parameter determines the overload type, -1 means ret type.
hidden = False
for a in attrs:
if (a == ""):
continue
@ -2833,6 +2835,10 @@ class db_hlsl(object):
if (a == "rn"):
readnone = True
continue
if (a == "hidden"):
hidden = True
continue
assign = a.split('=')
if (len(assign) != 2):
@ -2848,7 +2854,7 @@ class db_hlsl(object):
continue
assert False, "invalid attr %s" % (a)
return readonly, readnone, unsigned_op, overload_param_index
return readonly, readnone, unsigned_op, overload_param_index, hidden
current_namespace = None
for line in intrinsic_defs:
@ -2881,7 +2887,7 @@ class db_hlsl(object):
op = operand_match.group(1)
if not op:
op = name
readonly, readnone, unsigned_op, overload_param_index = process_attr(attr)
readonly, readnone, unsigned_op, overload_param_index, hidden = process_attr(attr)
# Add an entry for this intrinsic.
if bracket_cleanup_re.search(opts):
opts = bracket_cleanup_re.sub(r"<\1@\2>", opts)
@ -2905,7 +2911,7 @@ class db_hlsl(object):
# TODO: verify a single level of indirection
self.intrinsics.append(db_hlsl_intrinsic(
name, num_entries, op, args, current_namespace, ns_idx, "pending doc for " + name,
readonly, readnone, unsigned_op, overload_param_index))
readonly, readnone, unsigned_op, overload_param_index, hidden))
num_entries += 1
continue
assert False, "cannot parse line %s" % (line)

Просмотреть файл

@ -693,8 +693,13 @@ def get_hlsl_intrinsics():
ns_table += " {(UINT)%s::%s_%s, %s, %s, %d, %d, g_%s_Args%s},\n" % (opcode_namespace, id_prefix, i.name, str(i.readonly).lower(), str(i.readnone).lower(), i.overload_param_index,len(i.params), last_ns, arg_idx)
result += "static const HLSL_INTRINSIC_ARGUMENT g_%s_Args%s[] =\n{\n" % (last_ns, arg_idx)
for p in i.params:
name = p.name
if name == i.name and i.hidden:
# First parameter defines intrinsic name for parsing in HLSL.
# Prepend '$hidden$' for hidden intrinsic so it can't be used in HLSL.
name = "$hidden$" + name
result += " {\"%s\", %s, %s, %s, %s, %s, %s, %s},\n" % (
p.name, p.param_qual, p.template_id, p.template_list,
name, p.param_qual, p.template_id, p.template_list,
p.component_id, p.component_list, p.rows, p.cols)
result += "};\n\n"
arg_idx += 1