Fix some argument passing and groupshared bad codegen and crashes
- set address space for groupshared QualType and fix downstream effects Fix: - double LValue expression emit for in aggregate arguments - in agg param modifying caller's value instead of copy - groupshared matrix support in HLMatrixLower - groupshared base class member access - groupshared matrix member casting in class method Still in need of more fixes and tests: - incomplete array and auto dimensions from initializer Argument handling still needs overhaul. This fix retains old behavior, even when not quite correct to avoid worse regressions. Objects are not copied in, aggregate LValueToRValue cast expr is emitted as LValues instead of RValues because RValue emit path doesn't have HLSL changes necessary to handle certain cases, such as derived-to-base cast.
This commit is contained in:
Родитель
27a7bf4b0b
Коммит
b4bb5795cd
|
@ -104,6 +104,7 @@ namespace dxilutil {
|
|||
void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context);
|
||||
// Returns true if type contains HLSL Object type (resource)
|
||||
bool ContainsHLSLObjectType(llvm::Type *Ty);
|
||||
bool IsHLSLResourceType(llvm::Type *Ty);
|
||||
bool IsHLSLObjectType(llvm::Type *Ty);
|
||||
bool IsHLSLMatrixType(llvm::Type *Ty);
|
||||
bool IsSplat(llvm::ConstantDataVector *cdv);
|
||||
|
|
|
@ -396,16 +396,9 @@ llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
|
|||
return SkipAllocas(FindAllocaInsertionPt(F));
|
||||
}
|
||||
|
||||
bool IsHLSLObjectType(llvm::Type *Ty) {
|
||||
bool IsHLSLResourceType(llvm::Type *Ty) {
|
||||
if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
|
||||
StringRef name = ST->getName();
|
||||
// TODO: don't check names.
|
||||
if (name.startswith("dx.types.wave_t"))
|
||||
return true;
|
||||
|
||||
if (name.endswith("_slice_type"))
|
||||
return false;
|
||||
|
||||
name = name.ltrim("class.");
|
||||
name = name.ltrim("struct.");
|
||||
|
||||
|
@ -414,13 +407,6 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
|
|||
if (name == "SamplerComparisonState")
|
||||
return true;
|
||||
|
||||
if (name.startswith("TriangleStream<"))
|
||||
return true;
|
||||
if (name.startswith("PointStream<"))
|
||||
return true;
|
||||
if (name.startswith("LineStream<"))
|
||||
return true;
|
||||
|
||||
if (name.startswith("AppendStructuredBuffer<"))
|
||||
return true;
|
||||
if (name.startswith("ConsumeStructuredBuffer<"))
|
||||
|
@ -441,23 +427,53 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
|
|||
return true;
|
||||
if (name.startswith("StructuredBuffer<"))
|
||||
return true;
|
||||
if (name.startswith("Texture1D<"))
|
||||
|
||||
if (name.startswith("Texture")) {
|
||||
name = name.ltrim("Texture");
|
||||
if (name.startswith("1D<"))
|
||||
return true;
|
||||
if (name.startswith("Texture1DArray<"))
|
||||
if (name.startswith("1DArray<"))
|
||||
return true;
|
||||
if (name.startswith("Texture2D<"))
|
||||
if (name.startswith("2D<"))
|
||||
return true;
|
||||
if (name.startswith("Texture2DArray<"))
|
||||
if (name.startswith("2DArray<"))
|
||||
return true;
|
||||
if (name.startswith("Texture3D<"))
|
||||
if (name.startswith("3D<"))
|
||||
return true;
|
||||
if (name.startswith("TextureCube<"))
|
||||
if (name.startswith("Cube<"))
|
||||
return true;
|
||||
if (name.startswith("TextureCubeArray<"))
|
||||
if (name.startswith("CubeArray<"))
|
||||
return true;
|
||||
if (name.startswith("Texture2DMS<"))
|
||||
if (name.startswith("2DMS<"))
|
||||
return true;
|
||||
if (name.startswith("Texture2DMSArray<"))
|
||||
if (name.startswith("2DMSArray<"))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsHLSLObjectType(llvm::Type *Ty) {
|
||||
if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
|
||||
StringRef name = ST->getName();
|
||||
// TODO: don't check names.
|
||||
if (name.startswith("dx.types.wave_t"))
|
||||
return true;
|
||||
|
||||
if (name.endswith("_slice_type"))
|
||||
return false;
|
||||
|
||||
if (IsHLSLResourceType(Ty))
|
||||
return true;
|
||||
|
||||
name = name.ltrim("class.");
|
||||
name = name.ltrim("struct.");
|
||||
|
||||
if (name.startswith("TriangleStream<"))
|
||||
return true;
|
||||
if (name.startswith("PointStream<"))
|
||||
return true;
|
||||
if (name.startswith("LineStream<"))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -2330,9 +2330,78 @@ void HLMatrixLowerPass::DeleteDeadInsts() {
|
|||
m_inDeadInstsSet.clear();
|
||||
}
|
||||
|
||||
// Iterate users, tunnel through address space cast, and skip unused constant
|
||||
// users.
|
||||
class UserIter_TunnelAddrSpace_SkipUnusedConstantUser {
|
||||
private:
|
||||
Value::user_iterator UserIt;
|
||||
Value::user_iterator AddrSpaceCastUserIt;
|
||||
|
||||
public:
|
||||
User *U = nullptr;
|
||||
public:
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser() {}
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser(Value *V)
|
||||
{
|
||||
if (!V->user_empty())
|
||||
UserIt = V->user_begin();
|
||||
Next();
|
||||
}
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser(
|
||||
const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) = default;
|
||||
|
||||
User *Next() {
|
||||
U = nullptr;
|
||||
if (!AddrSpaceCastUserIt.atEnd()) {
|
||||
U = *(AddrSpaceCastUserIt++);
|
||||
return U;
|
||||
}
|
||||
while (!UserIt.atEnd()) {
|
||||
U = *(UserIt++);
|
||||
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
|
||||
if (CE->user_empty())
|
||||
continue;
|
||||
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
|
||||
AddrSpaceCastUserIt = CE->user_begin();
|
||||
U = *(AddrSpaceCastUserIt++);
|
||||
return U;
|
||||
}
|
||||
}
|
||||
return U;
|
||||
}
|
||||
U = nullptr;
|
||||
return U;
|
||||
}
|
||||
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser begin() { return *this; }
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser end() {
|
||||
return UserIter_TunnelAddrSpace_SkipUnusedConstantUser();
|
||||
}
|
||||
bool
|
||||
operator==(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) {
|
||||
return (U == other.U && UserIt == other.UserIt &&
|
||||
AddrSpaceCastUserIt == other.AddrSpaceCastUserIt);
|
||||
}
|
||||
bool
|
||||
operator!=(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) {
|
||||
return !(*this == (other));
|
||||
}
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser &operator++() {
|
||||
Next();
|
||||
return *this;
|
||||
}
|
||||
UserIter_TunnelAddrSpace_SkipUnusedConstantUser operator++(int) {
|
||||
auto tmp = *this;
|
||||
++*this;
|
||||
return tmp;
|
||||
}
|
||||
User *operator*() { return U; }
|
||||
User *operator->() { return U; }
|
||||
};
|
||||
|
||||
static bool OnlyUsedByMatrixLdSt(Value *V) {
|
||||
bool onlyLdSt = true;
|
||||
for (User *user : V->users()) {
|
||||
for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(V)) {
|
||||
if (isa<Constant>(user) && user->use_empty())
|
||||
continue;
|
||||
|
||||
|
@ -2417,7 +2486,7 @@ void HLMatrixLowerPass::runOnGlobalMatrixArray(GlobalVariable *GV) {
|
|||
HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, VecGV);
|
||||
}
|
||||
|
||||
for (User *U : GV->users()) {
|
||||
for (User *U : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
|
||||
Value *VecGEP = nullptr;
|
||||
// Must be GEP or GEPOperator.
|
||||
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
|
||||
|
@ -2522,7 +2591,7 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
|
|||
}
|
||||
vecGlobals[i] = EltGV;
|
||||
}
|
||||
for (User *user : GV->users()) {
|
||||
for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
|
||||
if (isa<Constant>(user) && user->use_empty())
|
||||
continue;
|
||||
CallInst *CI = cast<CallInst>(user);
|
||||
|
@ -2536,9 +2605,14 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
|
|||
// lower to array of scalar here.
|
||||
ArrayType *AT = ArrayType::get(vecTy->getVectorElementType(), vecTy->getVectorNumElements());
|
||||
Constant *InitVal = ConstantArray::get(AT, Elts);
|
||||
unsigned AddressSpace = GV->getType()->getAddressSpace();
|
||||
GlobalValue::LinkageTypes linkage = GV->getLinkage();
|
||||
GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
|
||||
GlobalVariable *arrayMat = new llvm::GlobalVariable(
|
||||
*M, AT, /*IsConstant*/ false, llvm::GlobalValue::InternalLinkage,
|
||||
/*InitVal*/ InitVal, GV->getName());
|
||||
*M, AT, /*IsConstant*/ false, linkage,
|
||||
/*InitVal*/ InitVal, GV->getName(),
|
||||
/*InsertBefore*/nullptr,
|
||||
TLMode, AddressSpace);
|
||||
// Add debug info.
|
||||
if (m_HasDbgInfo) {
|
||||
DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
|
||||
|
@ -2546,8 +2620,8 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
|
|||
arrayMat);
|
||||
}
|
||||
|
||||
for (auto U = GV->user_begin(); U != GV->user_end();) {
|
||||
Value *user = *(U++);
|
||||
for (User *U :UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
|
||||
Value *user = U;
|
||||
CallInst *CI = cast<CallInst>(user);
|
||||
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
|
||||
if (group == HLOpcodeGroup::HLMatLoadStore) {
|
||||
|
|
|
@ -2442,8 +2442,8 @@ static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsi
|
|||
unsigned ptrSize = DL.getTypeAllocSize(Ty);
|
||||
// Size match, return current level.
|
||||
if (ptrSize == size) {
|
||||
// Not go deeper for matrix.
|
||||
if (dxilutil::IsHLSLMatrixType(Ty))
|
||||
// Do not go deeper for matrix or object.
|
||||
if (dxilutil::IsHLSLMatrixType(Ty) || dxilutil::IsHLSLObjectType(Ty))
|
||||
return level;
|
||||
// For struct, go deeper if size not change.
|
||||
// This will leave memcpy to deeper level when flatten.
|
||||
|
@ -4186,9 +4186,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
|
|||
|
||||
/// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts.
|
||||
void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) {
|
||||
for (User *U : V->users()) {
|
||||
MarkEmptyStructUsers(U, DeadInsts);
|
||||
UndefValue *undef = UndefValue::get(V->getType());
|
||||
for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) {
|
||||
Value *U = *(itU++);
|
||||
// Kill memcpy, set operands to undef for call and ret, and recurse
|
||||
if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
|
||||
DeadInsts.emplace_back(MC);
|
||||
} else if (CallInst *CI = dyn_cast<CallInst>(U)) {
|
||||
for (auto &operand : CI->operands()) {
|
||||
if (operand == V)
|
||||
operand.set(undef);
|
||||
}
|
||||
} else if (ReturnInst *Ret = dyn_cast<ReturnInst>(U)) {
|
||||
Ret->setOperand(0, undef);
|
||||
} else if (isa<Constant>(U) || isa<GetElementPtrInst>(U) ||
|
||||
isa<BitCastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) {
|
||||
// Recurse users
|
||||
MarkEmptyStructUsers(U, DeadInsts);
|
||||
} else {
|
||||
DXASSERT(false, "otherwise, recursing unexpected empty struct user");
|
||||
}
|
||||
}
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
// Only need to add no use inst here.
|
||||
// DeleteDeadInst will delete everything.
|
||||
|
|
|
@ -171,7 +171,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
|
|||
|
||||
// Get the base pointer type.
|
||||
llvm::Type *BasePtrTy =
|
||||
ConvertType((PathEnd[-1])->getType())->getPointerTo();
|
||||
ConvertType((PathEnd[-1])->getType())->getPointerTo(
|
||||
Value->getType()->getPointerAddressSpace()); // HLSL Change: match address space
|
||||
|
||||
QualType DerivedTy = getContext().getRecordType(Derived);
|
||||
CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy);
|
||||
|
|
|
@ -1795,7 +1795,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
|
|||
}
|
||||
|
||||
LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
|
||||
if (IsScalar) {
|
||||
if (!getLangOpts().HLSL && IsScalar) { // HLSL Change: not ObjC
|
||||
Qualifiers qs = Ty.getQualifiers();
|
||||
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
|
||||
// We honor __attribute__((ns_consumed)) for types with lifetime.
|
||||
|
|
|
@ -3285,10 +3285,23 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
|
|||
LValue LV = EmitLValue(E->getSubExpr());
|
||||
QualType ToType = getContext().getLValueReferenceType(E->getType());
|
||||
|
||||
llvm::Value *FromValue = LV.getAddress();
|
||||
llvm::Type *FromTy = FromValue->getType();
|
||||
llvm::Type *RetTy = ConvertType(ToType);
|
||||
// type not changed, LValueToRValue, CStyleCast may go this path
|
||||
if (LV.getAddress()->getType() == RetTy)
|
||||
if (FromTy == RetTy) {
|
||||
return LV;
|
||||
// If only address space changed, add address space cast
|
||||
}
|
||||
if (FromTy->getPointerAddressSpace() != RetTy->getPointerAddressSpace()) {
|
||||
llvm::Type *ConvertedFromTy = llvm::PointerType::get(
|
||||
FromTy->getPointerElementType(), RetTy->getPointerAddressSpace());
|
||||
assert(ConvertedFromTy == RetTy &&
|
||||
"otherwise, more than just address space changing in one step");
|
||||
llvm::Value *cast =
|
||||
Builder.CreateAddrSpaceCast(FromValue, ConvertedFromTy);
|
||||
return MakeAddrLValue(cast, ToType);
|
||||
}
|
||||
llvm::Value *cast = CGM.getHLSLRuntime().EmitHLSLMatrixOperationCall(*this, E, RetTy, { LV.getAddress() });
|
||||
return MakeAddrLValue(cast, ToType);
|
||||
}
|
||||
|
|
|
@ -7010,9 +7010,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
const ParmVarDecl *Param = FD->getParamDecl(i);
|
||||
const Expr *Arg = E->getArg(i+ArgsToSkip);
|
||||
QualType ParamTy = Param->getType().getNonReferenceType();
|
||||
bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
|
||||
bool isAggregateType = !isObject &&
|
||||
(ParamTy->isArrayType() || ParamTy->isRecordType()) &&
|
||||
!hlsl::IsHLSLVecMatType(ParamTy);
|
||||
|
||||
bool EmitRValueAgg = false;
|
||||
bool RValOnRef = false;
|
||||
if (!Param->isModifierOut()) {
|
||||
if (!ParamTy->isAggregateType() || hlsl::IsHLSLMatType(ParamTy)) {
|
||||
if (!isAggregateType && !isObject) {
|
||||
if (Arg->isRValue() && Param->getType()->isReferenceType()) {
|
||||
// RValue on a reference type.
|
||||
if (const CStyleCastExpr *cCast = dyn_cast<CStyleCastExpr>(Arg)) {
|
||||
|
@ -7035,27 +7041,61 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
} else {
|
||||
continue;
|
||||
}
|
||||
} else if (isAggregateType) {
|
||||
// aggregate in-only - emit RValue, unless LValueToRValue cast
|
||||
EmitRValueAgg = true;
|
||||
if (const ImplicitCastExpr *cast =
|
||||
dyn_cast<ImplicitCastExpr>(Arg)) {
|
||||
if (cast->getCastKind() == CastKind::CK_LValueToRValue) {
|
||||
EmitRValueAgg = false;
|
||||
}
|
||||
}
|
||||
|
||||
// get original arg
|
||||
LValue argLV = CGF.EmitLValue(Arg);
|
||||
|
||||
if (!Param->isModifierOut() && !RValOnRef) {
|
||||
bool isDefaultAddrSpace = true;
|
||||
if (argLV.isSimple()) {
|
||||
isDefaultAddrSpace =
|
||||
argLV.getAddress()->getType()->getPointerAddressSpace() ==
|
||||
DXIL::kDefaultAddrSpace;
|
||||
}
|
||||
bool isHLSLIntrinsic = false;
|
||||
if (const FunctionDecl *Callee = E->getDirectCallee()) {
|
||||
isHLSLIntrinsic = Callee->hasAttr<HLSLIntrinsicAttr>();
|
||||
}
|
||||
// Copy in arg which is not default address space and not on hlsl intrinsic.
|
||||
if (isDefaultAddrSpace || isHLSLIntrinsic)
|
||||
} else {
|
||||
// Must be object
|
||||
DXASSERT(isObject, "otherwise, flow condition changed, breaking assumption");
|
||||
// in-only objects should be skipped to preserve previous behavior.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip unbounded array, since we cannot preserve copy-in copy-out
|
||||
// semantics for these.
|
||||
if (ParamTy->isIncompleteArrayType()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Param->isModifierOut() && !RValOnRef) {
|
||||
// No need to copy arg to in-only param for hlsl intrinsic.
|
||||
if (const FunctionDecl *Callee = E->getDirectCallee()) {
|
||||
if (Callee->hasAttr<HLSLIntrinsicAttr>())
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// get original arg
|
||||
// FIXME: This will not emit in correct argument order with the other
|
||||
// arguments. This should be integrated into
|
||||
// CodeGenFunction::EmitCallArg if possible.
|
||||
RValue argRV; // emit this if aggregate arg on in-only param
|
||||
LValue argLV; // otherwise, we may emit this
|
||||
llvm::Value *argAddr = nullptr;
|
||||
QualType argType = Arg->getType();
|
||||
CharUnits argAlignment;
|
||||
if (EmitRValueAgg) {
|
||||
argRV = CGF.EmitAnyExprToTemp(Arg);
|
||||
argAddr = argRV.getAggregateAddr(); // must be alloca
|
||||
argAlignment = CharUnits::fromQuantity(cast<AllocaInst>(argAddr)->getAlignment());
|
||||
argLV = LValue::MakeAddr(argAddr, ParamTy, argAlignment, CGF.getContext());
|
||||
} else {
|
||||
argLV = CGF.EmitLValue(Arg);
|
||||
if (argLV.isSimple())
|
||||
argAddr = argLV.getAddress();
|
||||
argType = argLV.getType(); // TBD: Can this be different than Arg->getType()?
|
||||
argAlignment = argLV.getAlignment();
|
||||
}
|
||||
// After emit Arg, we must update the argList[i],
|
||||
// otherwise we get double emit of the expression.
|
||||
|
||||
// create temp Var
|
||||
VarDecl *tmpArg =
|
||||
|
@ -7065,17 +7105,26 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
CGF.getContext().getTrivialTypeSourceInfo(ParamTy),
|
||||
StorageClass::SC_Auto);
|
||||
|
||||
bool isEmptyAggregate = false;
|
||||
if (isAggregateType) {
|
||||
DXASSERT(argAddr, "should be RV or simple LV");
|
||||
llvm::Type *ElTy = argAddr->getType()->getPointerElementType();
|
||||
while (ElTy->isArrayTy())
|
||||
ElTy = ElTy->getArrayElementType();
|
||||
if (llvm::StructType *ST = dyn_cast<StructType>(ElTy)) {
|
||||
DxilStructAnnotation *SA = m_pHLModule->GetTypeSystem().GetStructAnnotation(ST);
|
||||
isEmptyAggregate = SA && SA->IsEmptyStruct();
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate type will be indirect param convert to pointer type.
|
||||
// So don't update to ReferenceType, use RValue for it.
|
||||
bool isAggregateType = (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
|
||||
!hlsl::IsHLSLVecMatType(ParamTy);
|
||||
|
||||
const DeclRefExpr *tmpRef = DeclRefExpr::Create(
|
||||
CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), tmpArg,
|
||||
/*enclosing*/ false, tmpArg->getLocation(), ParamTy,
|
||||
isAggregateType ? VK_RValue : VK_LValue);
|
||||
(isAggregateType || isObject) ? VK_RValue : VK_LValue);
|
||||
|
||||
// update the arg
|
||||
// must update the arg, since we did emit Arg, else we get double emit.
|
||||
argList[i] = tmpRef;
|
||||
|
||||
// create alloc for the tmp arg
|
||||
|
@ -7090,7 +7139,12 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
// add it to local decl map
|
||||
TmpArgMap(tmpArg, tmpArgAddr);
|
||||
|
||||
LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argLV.getAlignment(),
|
||||
// If param is empty, copy in/out will just create problems.
|
||||
// No copy will result in undef, which is fine.
|
||||
if (isEmptyAggregate)
|
||||
continue;
|
||||
|
||||
LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argAlignment,
|
||||
CGF.getContext());
|
||||
|
||||
// save for cast after call
|
||||
|
@ -7099,22 +7153,18 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
castArgList.emplace_back(argLV);
|
||||
}
|
||||
|
||||
bool isObject = dxilutil::IsHLSLObjectType(
|
||||
tmpArgAddr->getType()->getPointerElementType());
|
||||
|
||||
// cast before the call
|
||||
if (Param->isModifierIn() &&
|
||||
// Don't copy object
|
||||
!isObject) {
|
||||
QualType ArgTy = Arg->getType();
|
||||
Value *outVal = nullptr;
|
||||
bool isAggregateTy = ParamTy->isAggregateType() && !IsHLSLVecMatType(ParamTy);
|
||||
if (!isAggregateTy) {
|
||||
if (!isAggregateType) {
|
||||
if (!IsHLSLMatType(ParamTy)) {
|
||||
RValue outRVal = CGF.EmitLoadOfLValue(argLV, SourceLocation());
|
||||
outVal = outRVal.getScalarVal();
|
||||
} else {
|
||||
Value *argAddr = argLV.getAddress();
|
||||
DXASSERT(argAddr, "should be RV or simple LV");
|
||||
outVal = EmitHLSLMatrixLoad(CGF, argAddr, ArgTy);
|
||||
}
|
||||
|
||||
|
@ -7124,15 +7174,16 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
|
|||
EmitHLSLMatrixStore(CGF, castVal, tmpArgAddr, ParamTy);
|
||||
}
|
||||
else {
|
||||
Value *castVal = ConvertScalarOrVector(CGF, outVal, argLV.getType(), tmpLV.getType());
|
||||
castVal = CGF.EmitToMemory(castVal, tmpLV.getType());
|
||||
Value *castVal = ConvertScalarOrVector(CGF, outVal, argType, ParamTy);
|
||||
castVal = CGF.EmitToMemory(castVal, ParamTy);
|
||||
CGF.Builder.CreateStore(castVal, tmpArgAddr);
|
||||
}
|
||||
} else {
|
||||
DXASSERT(argAddr, "should be RV or simple LV");
|
||||
SmallVector<Value *, 4> idxList;
|
||||
EmitHLSLAggregateCopy(CGF, argLV.getAddress(), tmpLV.getAddress(),
|
||||
EmitHLSLAggregateCopy(CGF, argAddr, tmpArgAddr,
|
||||
idxList, ArgTy, ParamTy,
|
||||
argLV.getAddress()->getType());
|
||||
argAddr->getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1811,11 +1811,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
|
|||
|
||||
// Make sure the result is of the correct type.
|
||||
if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) {
|
||||
// HLSL Change Begins
|
||||
// TODO: do we put address space in type?
|
||||
if (LangOpts.HLSL) return Entry;
|
||||
else
|
||||
// HLSL Change Ends
|
||||
return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
|
||||
}
|
||||
|
||||
|
@ -1869,7 +1864,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
|
|||
GV->setSection(".cp.rodata");
|
||||
}
|
||||
|
||||
if (AddrSpace != Ty->getAddressSpace() && !LangOpts.HLSL) // HLSL Change -TODO: do we put address space in type?
|
||||
if (AddrSpace != Ty->getAddressSpace())
|
||||
return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
|
||||
|
||||
|
||||
|
|
|
@ -10806,6 +10806,9 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
|
|||
case AttributeList::AT_HLSLGroupShared:
|
||||
declAttr = ::new (S.Context) HLSLGroupSharedAttr(A.getRange(), S.Context,
|
||||
A.getAttributeSpellingListIndex());
|
||||
if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
|
||||
VD->setType(S.Context.getAddrSpaceQualType(VD->getType(), DXIL::kTGSMAddrSpace));
|
||||
}
|
||||
break;
|
||||
case AttributeList::AT_HLSLUniform:
|
||||
declAttr = ::new (S.Context) HLSLUniformAttr(A.getRange(), S.Context,
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
|
||||
|
||||
// Make sure nested empty struct works. Also test related paths such as
|
||||
// derived, multi-dim array in constant buffer, and argument passing.
|
||||
// CHECK: main
|
||||
|
||||
struct KillerStruct {};
|
||||
|
||||
struct InnerStruct {
|
||||
KillerStruct s;
|
||||
};
|
||||
|
||||
struct OuterStruct {
|
||||
InnerStruct s;
|
||||
};
|
||||
|
||||
class Derived : OuterStruct {
|
||||
InnerStruct s2;
|
||||
};
|
||||
|
||||
cbuffer Params_cbuffer : register(b0) {
|
||||
Derived constants[2][3];
|
||||
};
|
||||
|
||||
float4 foo(Derived s) { return (float4)0; }
|
||||
|
||||
float4 main() : SV_POSITION {
|
||||
return foo(constants[1][2]);
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
|
||||
|
||||
// This tests cast of derived to base when derived is groupshared.
|
||||
// Different use cases can hit different code paths, hence the variety of
|
||||
// uses here:
|
||||
// - calling base method
|
||||
// - vector element assignment on base member
|
||||
// - casting to base and passing to function
|
||||
// The barrier and write to RWBuf prevents optimizations from eliminating
|
||||
// groupshared use, considering this dead-code, or detecting a race condition.
|
||||
|
||||
// CHECK: @[[gs0:.+]] = addrspace(3) global i32 undef
|
||||
// CHECK: @[[gs1:.+]] = addrspace(3) global i32 undef
|
||||
// CHECK: @[[gs2:.+]] = addrspace(3) global i32 undef
|
||||
// CHECK: store i32 1, i32 addrspace(3)* @[[gs0]], align 4
|
||||
// CHECK: store i32 2, i32 addrspace(3)* @[[gs1]], align 4
|
||||
// CHECK: store i32 3, i32 addrspace(3)* @[[gs2]], align 4
|
||||
|
||||
// CHECK: %[[l0:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs0]], align 4
|
||||
// CHECK: %[[l1:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs1]], align 4
|
||||
// CHECK: %[[l2:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs2]], align 4
|
||||
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.+}}, i32 %{{.+}}, i32 undef, i32 %[[l0]], i32 %[[l1]], i32 %[[l2]], i32 undef, i8 7)
|
||||
|
||||
|
||||
class Base {
|
||||
uint3 u;
|
||||
void set_u_y(uint value) { u.y = value; }
|
||||
};
|
||||
class Derived : Base {
|
||||
float bar;
|
||||
};
|
||||
|
||||
groupshared Derived gs_derived;
|
||||
RWByteAddressBuffer RWBuf;
|
||||
|
||||
void UpdateBase_z(inout Base b, uint value) {
|
||||
b.u.z = value;
|
||||
}
|
||||
|
||||
[numthreads(2, 1, 1)]
|
||||
void main(uint3 groupThreadID: SV_GroupThreadID) {
|
||||
if (groupThreadID.x == 0) {
|
||||
gs_derived.u.x = 1;
|
||||
gs_derived.set_u_y(2);
|
||||
UpdateBase_z((Base)gs_derived, 3);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint addr = groupThreadID.x * 4;
|
||||
RWBuf.Store3(addr, gs_derived.u);
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
// RUN: %dxc -E main -T cs_6_0 -Zpc %s | FileCheck %s
|
||||
|
||||
// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
|
||||
// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
|
||||
// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
|
||||
|
||||
// CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1)
|
||||
// CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
|
||||
// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
|
||||
|
||||
// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
|
||||
// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
|
||||
// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
|
||||
// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
|
||||
|
||||
// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
|
||||
// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
|
||||
|
||||
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
|
||||
|
||||
float4 rows[4];
|
||||
|
||||
void set_row(inout float4 row, uint i) {
|
||||
row = rows[i];
|
||||
}
|
||||
|
||||
class Obj {
|
||||
float4x4 mat;
|
||||
void set() {
|
||||
set_row(mat[0], 0);
|
||||
set_row(mat[1], 1);
|
||||
set_row(mat[2], 2);
|
||||
set_row(mat[3], 3);
|
||||
}
|
||||
};
|
||||
|
||||
RWByteAddressBuffer RWBuf;
|
||||
groupshared Obj obj;
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint3 groupThreadID: SV_GroupThreadID) {
|
||||
if (groupThreadID.x == 0) {
|
||||
obj.set();
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
float4 row = obj.mat[groupThreadID.x];
|
||||
uint addr = groupThreadID.x * 4;
|
||||
RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s
|
||||
|
||||
// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
|
||||
// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
|
||||
// CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
|
||||
|
||||
// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
|
||||
// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
|
||||
|
||||
// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
|
||||
// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16
|
||||
// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
|
||||
// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
|
||||
|
||||
// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
|
||||
// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
|
||||
|
||||
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
|
||||
|
||||
float4 rows[4];
|
||||
|
||||
void set_row(inout float4 row, uint i) {
|
||||
row = rows[i];
|
||||
}
|
||||
|
||||
class Obj {
|
||||
float4x4 mat;
|
||||
void set() {
|
||||
set_row(mat[0], 0);
|
||||
set_row(mat[1], 1);
|
||||
set_row(mat[2], 2);
|
||||
set_row(mat[3], 3);
|
||||
}
|
||||
};
|
||||
|
||||
RWByteAddressBuffer RWBuf;
|
||||
groupshared Obj obj;
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(uint3 groupThreadID: SV_GroupThreadID) {
|
||||
if (groupThreadID.x == 0) {
|
||||
obj.set();
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
float4 row = obj.mat[groupThreadID.x];
|
||||
uint addr = groupThreadID.x * 4;
|
||||
RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
|
||||
}
|
|
@ -17,7 +17,7 @@ A a;
|
|||
|
||||
static A a2;
|
||||
|
||||
void set(A aa) {
|
||||
void set(out A aa) {
|
||||
aa = a;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
|
||||
|
||||
// Verify that modificaion of in-only struct parameter does not modify
|
||||
// the value passed in by the caller.
|
||||
|
||||
// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
|
||||
// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
|
||||
// CHECK-DAG: [[o1:%[^ ]*]] = fmul fast float [[p]], [[f]]
|
||||
// CHECK-DAG: [[ret:%[^ ]*]] = fadd fast float [[o1]], [[p]]
|
||||
// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
|
||||
|
||||
struct PayloadStruct {
|
||||
float Color;
|
||||
};
|
||||
|
||||
PayloadStruct MulPayload(in PayloadStruct Payload, in float x)
|
||||
{
|
||||
Payload.Color *= x;
|
||||
return Payload;
|
||||
}
|
||||
|
||||
void main(PayloadStruct p : Payload,
|
||||
float f : INPUT,
|
||||
out PayloadStruct o : SV_Target) {
|
||||
|
||||
o = MulPayload(p, f);
|
||||
o.Color += p.Color;
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
|
||||
|
||||
// Verify that passing struct result of call as arg to another call does not
|
||||
// generate extra call.
|
||||
|
||||
// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
|
||||
// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
|
||||
// CHECK-DAG: [[factor:%[^ ]*]] = fmul fast float [[f]], 2.000000e+00
|
||||
// CHECK-DAG: [[factor2:%[^ ]*]] = fadd fast float [[factor]], 1.000000e+00
|
||||
// CHECK-DAG: [[ret:%[^ ]*]] = fmul fast float [[factor2]], [[p]]
|
||||
// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
|
||||
|
||||
struct PayloadStruct {
|
||||
float Color;
|
||||
};
|
||||
|
||||
static float factor = 1.0;
|
||||
|
||||
PayloadStruct MulPayload(in PayloadStruct Payload)
|
||||
{
|
||||
Payload.Color *= factor;
|
||||
factor += 1.0;
|
||||
return Payload;
|
||||
}
|
||||
|
||||
PayloadStruct AddPayload(in PayloadStruct Payload0, in PayloadStruct Payload1)
|
||||
{
|
||||
Payload0.Color += Payload1.Color;
|
||||
return Payload0;
|
||||
}
|
||||
|
||||
void main(PayloadStruct p : Payload,
|
||||
float f : INPUT,
|
||||
out PayloadStruct OutputPayload : SV_Target) {
|
||||
factor = f;
|
||||
OutputPayload = AddPayload(MulPayload(p),
|
||||
MulPayload(p));
|
||||
}
|
|
@ -1,7 +1,8 @@
|
|||
// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
|
||||
|
||||
// Make sure function call on external function has correct type.
|
||||
// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* nonnull {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
|
||||
// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
|
||||
|
||||
struct T {
|
||||
float a;
|
||||
float b;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
// Make sure function call on external function has correct type.
|
||||
|
||||
// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull %tmp) #2
|
||||
// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull {{.*}}) #2
|
||||
|
||||
struct T {
|
||||
float a;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
|
||||
|
||||
// Make sure empty struct arg works.
|
||||
// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* %t)
|
||||
// Make sure empty struct arg is replaced with undef.
|
||||
// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* undef)
|
||||
|
||||
struct T {
|
||||
};
|
||||
|
|
Загрузка…
Ссылка в новой задаче