Fix some argument passing and groupshared bad codegen and crashes

- set address space for groupshared QualType and fix downstream effects
Fix:
- double LValue expression emit for in aggregate arguments
- in agg param modifying caller's value instead of copy
- groupshared matrix support in HLMatrixLower
- groupshared base class member access
- groupshared matrix member casting in class method

Still in need of more fixes and tests:
- incomplete array and auto dimensions from initializer

Argument handling still needs overhaul.  This fix retains old behavior, even
when not quite correct to avoid worse regressions.  Objects are not copied
in, aggregate LValueToRValue cast expr is emitted as LValues instead of
RValues because RValue emit path doesn't have HLSL changes necessary to handle
certain cases, such as derived-to-base cast.
This commit is contained in:
Tex Riddell 2019-02-19 18:37:27 -08:00
Родитель 27a7bf4b0b
Коммит b4bb5795cd
20 изменённых файлов: 499 добавлений и 83 удалений

Просмотреть файл

@ -104,6 +104,7 @@ namespace dxilutil {
void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context); void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context);
// Returns true if type contains HLSL Object type (resource) // Returns true if type contains HLSL Object type (resource)
bool ContainsHLSLObjectType(llvm::Type *Ty); bool ContainsHLSLObjectType(llvm::Type *Ty);
bool IsHLSLResourceType(llvm::Type *Ty);
bool IsHLSLObjectType(llvm::Type *Ty); bool IsHLSLObjectType(llvm::Type *Ty);
bool IsHLSLMatrixType(llvm::Type *Ty); bool IsHLSLMatrixType(llvm::Type *Ty);
bool IsSplat(llvm::ConstantDataVector *cdv); bool IsSplat(llvm::ConstantDataVector *cdv);

Просмотреть файл

@ -396,16 +396,9 @@ llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) {
return SkipAllocas(FindAllocaInsertionPt(F)); return SkipAllocas(FindAllocaInsertionPt(F));
} }
bool IsHLSLObjectType(llvm::Type *Ty) { bool IsHLSLResourceType(llvm::Type *Ty) {
if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) { if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
StringRef name = ST->getName(); StringRef name = ST->getName();
// TODO: don't check names.
if (name.startswith("dx.types.wave_t"))
return true;
if (name.endswith("_slice_type"))
return false;
name = name.ltrim("class."); name = name.ltrim("class.");
name = name.ltrim("struct."); name = name.ltrim("struct.");
@ -414,13 +407,6 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
if (name == "SamplerComparisonState") if (name == "SamplerComparisonState")
return true; return true;
if (name.startswith("TriangleStream<"))
return true;
if (name.startswith("PointStream<"))
return true;
if (name.startswith("LineStream<"))
return true;
if (name.startswith("AppendStructuredBuffer<")) if (name.startswith("AppendStructuredBuffer<"))
return true; return true;
if (name.startswith("ConsumeStructuredBuffer<")) if (name.startswith("ConsumeStructuredBuffer<"))
@ -441,23 +427,53 @@ bool IsHLSLObjectType(llvm::Type *Ty) {
return true; return true;
if (name.startswith("StructuredBuffer<")) if (name.startswith("StructuredBuffer<"))
return true; return true;
if (name.startswith("Texture1D<"))
if (name.startswith("Texture")) {
name = name.ltrim("Texture");
if (name.startswith("1D<"))
return true; return true;
if (name.startswith("Texture1DArray<")) if (name.startswith("1DArray<"))
return true; return true;
if (name.startswith("Texture2D<")) if (name.startswith("2D<"))
return true; return true;
if (name.startswith("Texture2DArray<")) if (name.startswith("2DArray<"))
return true; return true;
if (name.startswith("Texture3D<")) if (name.startswith("3D<"))
return true; return true;
if (name.startswith("TextureCube<")) if (name.startswith("Cube<"))
return true; return true;
if (name.startswith("TextureCubeArray<")) if (name.startswith("CubeArray<"))
return true; return true;
if (name.startswith("Texture2DMS<")) if (name.startswith("2DMS<"))
return true; return true;
if (name.startswith("Texture2DMSArray<")) if (name.startswith("2DMSArray<"))
return true;
}
}
return false;
}
bool IsHLSLObjectType(llvm::Type *Ty) {
if (llvm::StructType *ST = dyn_cast<llvm::StructType>(Ty)) {
StringRef name = ST->getName();
// TODO: don't check names.
if (name.startswith("dx.types.wave_t"))
return true;
if (name.endswith("_slice_type"))
return false;
if (IsHLSLResourceType(Ty))
return true;
name = name.ltrim("class.");
name = name.ltrim("struct.");
if (name.startswith("TriangleStream<"))
return true;
if (name.startswith("PointStream<"))
return true;
if (name.startswith("LineStream<"))
return true; return true;
} }
return false; return false;

Просмотреть файл

@ -2330,9 +2330,78 @@ void HLMatrixLowerPass::DeleteDeadInsts() {
m_inDeadInstsSet.clear(); m_inDeadInstsSet.clear();
} }
// Iterate users, tunnel through address space cast, and skip unused constant
// users.
class UserIter_TunnelAddrSpace_SkipUnusedConstantUser {
private:
Value::user_iterator UserIt;
Value::user_iterator AddrSpaceCastUserIt;
public:
User *U = nullptr;
public:
UserIter_TunnelAddrSpace_SkipUnusedConstantUser() {}
UserIter_TunnelAddrSpace_SkipUnusedConstantUser(Value *V)
{
if (!V->user_empty())
UserIt = V->user_begin();
Next();
}
UserIter_TunnelAddrSpace_SkipUnusedConstantUser(
const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) = default;
User *Next() {
U = nullptr;
if (!AddrSpaceCastUserIt.atEnd()) {
U = *(AddrSpaceCastUserIt++);
return U;
}
while (!UserIt.atEnd()) {
U = *(UserIt++);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->user_empty())
continue;
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
AddrSpaceCastUserIt = CE->user_begin();
U = *(AddrSpaceCastUserIt++);
return U;
}
}
return U;
}
U = nullptr;
return U;
}
UserIter_TunnelAddrSpace_SkipUnusedConstantUser begin() { return *this; }
UserIter_TunnelAddrSpace_SkipUnusedConstantUser end() {
return UserIter_TunnelAddrSpace_SkipUnusedConstantUser();
}
bool
operator==(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) {
return (U == other.U && UserIt == other.UserIt &&
AddrSpaceCastUserIt == other.AddrSpaceCastUserIt);
}
bool
operator!=(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) {
return !(*this == (other));
}
UserIter_TunnelAddrSpace_SkipUnusedConstantUser &operator++() {
Next();
return *this;
}
UserIter_TunnelAddrSpace_SkipUnusedConstantUser operator++(int) {
auto tmp = *this;
++*this;
return tmp;
}
User *operator*() { return U; }
User *operator->() { return U; }
};
static bool OnlyUsedByMatrixLdSt(Value *V) { static bool OnlyUsedByMatrixLdSt(Value *V) {
bool onlyLdSt = true; bool onlyLdSt = true;
for (User *user : V->users()) { for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(V)) {
if (isa<Constant>(user) && user->use_empty()) if (isa<Constant>(user) && user->use_empty())
continue; continue;
@ -2417,7 +2486,7 @@ void HLMatrixLowerPass::runOnGlobalMatrixArray(GlobalVariable *GV) {
HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, VecGV); HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, VecGV);
} }
for (User *U : GV->users()) { for (User *U : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
Value *VecGEP = nullptr; Value *VecGEP = nullptr;
// Must be GEP or GEPOperator. // Must be GEP or GEPOperator.
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
@ -2522,7 +2591,7 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
} }
vecGlobals[i] = EltGV; vecGlobals[i] = EltGV;
} }
for (User *user : GV->users()) { for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
if (isa<Constant>(user) && user->use_empty()) if (isa<Constant>(user) && user->use_empty())
continue; continue;
CallInst *CI = cast<CallInst>(user); CallInst *CI = cast<CallInst>(user);
@ -2536,9 +2605,14 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
// lower to array of scalar here. // lower to array of scalar here.
ArrayType *AT = ArrayType::get(vecTy->getVectorElementType(), vecTy->getVectorNumElements()); ArrayType *AT = ArrayType::get(vecTy->getVectorElementType(), vecTy->getVectorNumElements());
Constant *InitVal = ConstantArray::get(AT, Elts); Constant *InitVal = ConstantArray::get(AT, Elts);
unsigned AddressSpace = GV->getType()->getAddressSpace();
GlobalValue::LinkageTypes linkage = GV->getLinkage();
GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode();
GlobalVariable *arrayMat = new llvm::GlobalVariable( GlobalVariable *arrayMat = new llvm::GlobalVariable(
*M, AT, /*IsConstant*/ false, llvm::GlobalValue::InternalLinkage, *M, AT, /*IsConstant*/ false, linkage,
/*InitVal*/ InitVal, GV->getName()); /*InitVal*/ InitVal, GV->getName(),
/*InsertBefore*/nullptr,
TLMode, AddressSpace);
// Add debug info. // Add debug info.
if (m_HasDbgInfo) { if (m_HasDbgInfo) {
DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder(); DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder();
@ -2546,8 +2620,8 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) {
arrayMat); arrayMat);
} }
for (auto U = GV->user_begin(); U != GV->user_end();) { for (User *U :UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) {
Value *user = *(U++); Value *user = U;
CallInst *CI = cast<CallInst>(user); CallInst *CI = cast<CallInst>(user);
HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction());
if (group == HLOpcodeGroup::HLMatLoadStore) { if (group == HLOpcodeGroup::HLMatLoadStore) {

Просмотреть файл

@ -2442,8 +2442,8 @@ static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsi
unsigned ptrSize = DL.getTypeAllocSize(Ty); unsigned ptrSize = DL.getTypeAllocSize(Ty);
// Size match, return current level. // Size match, return current level.
if (ptrSize == size) { if (ptrSize == size) {
// Not go deeper for matrix. // Do not go deeper for matrix or object.
if (dxilutil::IsHLSLMatrixType(Ty)) if (dxilutil::IsHLSLMatrixType(Ty) || dxilutil::IsHLSLObjectType(Ty))
return level; return level;
// For struct, go deeper if size not change. // For struct, go deeper if size not change.
// This will leave memcpy to deeper level when flatten. // This will leave memcpy to deeper level when flatten.
@ -4186,9 +4186,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation,
/// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts. /// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts.
void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) { void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector<Value *, 32> &DeadInsts) {
for (User *U : V->users()) { UndefValue *undef = UndefValue::get(V->getType());
MarkEmptyStructUsers(U, DeadInsts); for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) {
Value *U = *(itU++);
// Kill memcpy, set operands to undef for call and ret, and recurse
if (MemCpyInst *MC = dyn_cast<MemCpyInst>(U)) {
DeadInsts.emplace_back(MC);
} else if (CallInst *CI = dyn_cast<CallInst>(U)) {
for (auto &operand : CI->operands()) {
if (operand == V)
operand.set(undef);
} }
} else if (ReturnInst *Ret = dyn_cast<ReturnInst>(U)) {
Ret->setOperand(0, undef);
} else if (isa<Constant>(U) || isa<GetElementPtrInst>(U) ||
isa<BitCastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) {
// Recurse users
MarkEmptyStructUsers(U, DeadInsts);
} else {
DXASSERT(false, "otherwise, recursing unexpected empty struct user");
}
}
if (Instruction *I = dyn_cast<Instruction>(V)) { if (Instruction *I = dyn_cast<Instruction>(V)) {
// Only need to add no use inst here. // Only need to add no use inst here.
// DeleteDeadInst will delete everything. // DeleteDeadInst will delete everything.

Просмотреть файл

@ -171,7 +171,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
// Get the base pointer type. // Get the base pointer type.
llvm::Type *BasePtrTy = llvm::Type *BasePtrTy =
ConvertType((PathEnd[-1])->getType())->getPointerTo(); ConvertType((PathEnd[-1])->getType())->getPointerTo(
Value->getType()->getPointerAddressSpace()); // HLSL Change: match address space
QualType DerivedTy = getContext().getRecordType(Derived); QualType DerivedTy = getContext().getRecordType(Derived);
CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy); CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy);

Просмотреть файл

@ -1795,7 +1795,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
} }
LValue lv = MakeAddrLValue(DeclPtr, Ty, Align); LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
if (IsScalar) { if (!getLangOpts().HLSL && IsScalar) { // HLSL Change: not ObjC
Qualifiers qs = Ty.getQualifiers(); Qualifiers qs = Ty.getQualifiers();
if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
// We honor __attribute__((ns_consumed)) for types with lifetime. // We honor __attribute__((ns_consumed)) for types with lifetime.

Просмотреть файл

@ -3285,10 +3285,23 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
LValue LV = EmitLValue(E->getSubExpr()); LValue LV = EmitLValue(E->getSubExpr());
QualType ToType = getContext().getLValueReferenceType(E->getType()); QualType ToType = getContext().getLValueReferenceType(E->getType());
llvm::Value *FromValue = LV.getAddress();
llvm::Type *FromTy = FromValue->getType();
llvm::Type *RetTy = ConvertType(ToType); llvm::Type *RetTy = ConvertType(ToType);
// type not changed, LValueToRValue, CStyleCast may go this path // type not changed, LValueToRValue, CStyleCast may go this path
if (LV.getAddress()->getType() == RetTy) if (FromTy == RetTy) {
return LV; return LV;
// If only address space changed, add address space cast
}
if (FromTy->getPointerAddressSpace() != RetTy->getPointerAddressSpace()) {
llvm::Type *ConvertedFromTy = llvm::PointerType::get(
FromTy->getPointerElementType(), RetTy->getPointerAddressSpace());
assert(ConvertedFromTy == RetTy &&
"otherwise, more than just address space changing in one step");
llvm::Value *cast =
Builder.CreateAddrSpaceCast(FromValue, ConvertedFromTy);
return MakeAddrLValue(cast, ToType);
}
llvm::Value *cast = CGM.getHLSLRuntime().EmitHLSLMatrixOperationCall(*this, E, RetTy, { LV.getAddress() }); llvm::Value *cast = CGM.getHLSLRuntime().EmitHLSLMatrixOperationCall(*this, E, RetTy, { LV.getAddress() });
return MakeAddrLValue(cast, ToType); return MakeAddrLValue(cast, ToType);
} }

Просмотреть файл

@ -7010,9 +7010,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
const ParmVarDecl *Param = FD->getParamDecl(i); const ParmVarDecl *Param = FD->getParamDecl(i);
const Expr *Arg = E->getArg(i+ArgsToSkip); const Expr *Arg = E->getArg(i+ArgsToSkip);
QualType ParamTy = Param->getType().getNonReferenceType(); QualType ParamTy = Param->getType().getNonReferenceType();
bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy));
bool isAggregateType = !isObject &&
(ParamTy->isArrayType() || ParamTy->isRecordType()) &&
!hlsl::IsHLSLVecMatType(ParamTy);
bool EmitRValueAgg = false;
bool RValOnRef = false; bool RValOnRef = false;
if (!Param->isModifierOut()) { if (!Param->isModifierOut()) {
if (!ParamTy->isAggregateType() || hlsl::IsHLSLMatType(ParamTy)) { if (!isAggregateType && !isObject) {
if (Arg->isRValue() && Param->getType()->isReferenceType()) { if (Arg->isRValue() && Param->getType()->isReferenceType()) {
// RValue on a reference type. // RValue on a reference type.
if (const CStyleCastExpr *cCast = dyn_cast<CStyleCastExpr>(Arg)) { if (const CStyleCastExpr *cCast = dyn_cast<CStyleCastExpr>(Arg)) {
@ -7035,27 +7041,61 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
} else { } else {
continue; continue;
} }
} else if (isAggregateType) {
// aggregate in-only - emit RValue, unless LValueToRValue cast
EmitRValueAgg = true;
if (const ImplicitCastExpr *cast =
dyn_cast<ImplicitCastExpr>(Arg)) {
if (cast->getCastKind() == CastKind::CK_LValueToRValue) {
EmitRValueAgg = false;
} }
} }
} else {
// get original arg // Must be object
LValue argLV = CGF.EmitLValue(Arg); DXASSERT(isObject, "otherwise, flow condition changed, breaking assumption");
// in-only objects should be skipped to preserve previous behavior.
if (!Param->isModifierOut() && !RValOnRef) {
bool isDefaultAddrSpace = true;
if (argLV.isSimple()) {
isDefaultAddrSpace =
argLV.getAddress()->getType()->getPointerAddressSpace() ==
DXIL::kDefaultAddrSpace;
}
bool isHLSLIntrinsic = false;
if (const FunctionDecl *Callee = E->getDirectCallee()) {
isHLSLIntrinsic = Callee->hasAttr<HLSLIntrinsicAttr>();
}
// Copy in arg which is not default address space and not on hlsl intrinsic.
if (isDefaultAddrSpace || isHLSLIntrinsic)
continue; continue;
} }
}
// Skip unbounded array, since we cannot preserve copy-in copy-out
// semantics for these.
if (ParamTy->isIncompleteArrayType()) {
continue;
}
if (!Param->isModifierOut() && !RValOnRef) {
// No need to copy arg to in-only param for hlsl intrinsic.
if (const FunctionDecl *Callee = E->getDirectCallee()) {
if (Callee->hasAttr<HLSLIntrinsicAttr>())
continue;
}
}
// get original arg
// FIXME: This will not emit in correct argument order with the other
// arguments. This should be integrated into
// CodeGenFunction::EmitCallArg if possible.
RValue argRV; // emit this if aggregate arg on in-only param
LValue argLV; // otherwise, we may emit this
llvm::Value *argAddr = nullptr;
QualType argType = Arg->getType();
CharUnits argAlignment;
if (EmitRValueAgg) {
argRV = CGF.EmitAnyExprToTemp(Arg);
argAddr = argRV.getAggregateAddr(); // must be alloca
argAlignment = CharUnits::fromQuantity(cast<AllocaInst>(argAddr)->getAlignment());
argLV = LValue::MakeAddr(argAddr, ParamTy, argAlignment, CGF.getContext());
} else {
argLV = CGF.EmitLValue(Arg);
if (argLV.isSimple())
argAddr = argLV.getAddress();
argType = argLV.getType(); // TBD: Can this be different than Arg->getType()?
argAlignment = argLV.getAlignment();
}
// After emit Arg, we must update the argList[i],
// otherwise we get double emit of the expression.
// create temp Var // create temp Var
VarDecl *tmpArg = VarDecl *tmpArg =
@ -7065,17 +7105,26 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
CGF.getContext().getTrivialTypeSourceInfo(ParamTy), CGF.getContext().getTrivialTypeSourceInfo(ParamTy),
StorageClass::SC_Auto); StorageClass::SC_Auto);
bool isEmptyAggregate = false;
if (isAggregateType) {
DXASSERT(argAddr, "should be RV or simple LV");
llvm::Type *ElTy = argAddr->getType()->getPointerElementType();
while (ElTy->isArrayTy())
ElTy = ElTy->getArrayElementType();
if (llvm::StructType *ST = dyn_cast<StructType>(ElTy)) {
DxilStructAnnotation *SA = m_pHLModule->GetTypeSystem().GetStructAnnotation(ST);
isEmptyAggregate = SA && SA->IsEmptyStruct();
}
}
// Aggregate type will be indirect param convert to pointer type. // Aggregate type will be indirect param convert to pointer type.
// So don't update to ReferenceType, use RValue for it. // So don't update to ReferenceType, use RValue for it.
bool isAggregateType = (ParamTy->isArrayType() || ParamTy->isRecordType()) &&
!hlsl::IsHLSLVecMatType(ParamTy);
const DeclRefExpr *tmpRef = DeclRefExpr::Create( const DeclRefExpr *tmpRef = DeclRefExpr::Create(
CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), tmpArg, CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), tmpArg,
/*enclosing*/ false, tmpArg->getLocation(), ParamTy, /*enclosing*/ false, tmpArg->getLocation(), ParamTy,
isAggregateType ? VK_RValue : VK_LValue); (isAggregateType || isObject) ? VK_RValue : VK_LValue);
// update the arg // must update the arg, since we did emit Arg, else we get double emit.
argList[i] = tmpRef; argList[i] = tmpRef;
// create alloc for the tmp arg // create alloc for the tmp arg
@ -7090,7 +7139,12 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
// add it to local decl map // add it to local decl map
TmpArgMap(tmpArg, tmpArgAddr); TmpArgMap(tmpArg, tmpArgAddr);
LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argLV.getAlignment(), // If param is empty, copy in/out will just create problems.
// No copy will result in undef, which is fine.
if (isEmptyAggregate)
continue;
LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argAlignment,
CGF.getContext()); CGF.getContext());
// save for cast after call // save for cast after call
@ -7099,22 +7153,18 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
castArgList.emplace_back(argLV); castArgList.emplace_back(argLV);
} }
bool isObject = dxilutil::IsHLSLObjectType(
tmpArgAddr->getType()->getPointerElementType());
// cast before the call // cast before the call
if (Param->isModifierIn() && if (Param->isModifierIn() &&
// Don't copy object // Don't copy object
!isObject) { !isObject) {
QualType ArgTy = Arg->getType(); QualType ArgTy = Arg->getType();
Value *outVal = nullptr; Value *outVal = nullptr;
bool isAggregateTy = ParamTy->isAggregateType() && !IsHLSLVecMatType(ParamTy); if (!isAggregateType) {
if (!isAggregateTy) {
if (!IsHLSLMatType(ParamTy)) { if (!IsHLSLMatType(ParamTy)) {
RValue outRVal = CGF.EmitLoadOfLValue(argLV, SourceLocation()); RValue outRVal = CGF.EmitLoadOfLValue(argLV, SourceLocation());
outVal = outRVal.getScalarVal(); outVal = outRVal.getScalarVal();
} else { } else {
Value *argAddr = argLV.getAddress(); DXASSERT(argAddr, "should be RV or simple LV");
outVal = EmitHLSLMatrixLoad(CGF, argAddr, ArgTy); outVal = EmitHLSLMatrixLoad(CGF, argAddr, ArgTy);
} }
@ -7124,15 +7174,16 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
EmitHLSLMatrixStore(CGF, castVal, tmpArgAddr, ParamTy); EmitHLSLMatrixStore(CGF, castVal, tmpArgAddr, ParamTy);
} }
else { else {
Value *castVal = ConvertScalarOrVector(CGF, outVal, argLV.getType(), tmpLV.getType()); Value *castVal = ConvertScalarOrVector(CGF, outVal, argType, ParamTy);
castVal = CGF.EmitToMemory(castVal, tmpLV.getType()); castVal = CGF.EmitToMemory(castVal, ParamTy);
CGF.Builder.CreateStore(castVal, tmpArgAddr); CGF.Builder.CreateStore(castVal, tmpArgAddr);
} }
} else { } else {
DXASSERT(argAddr, "should be RV or simple LV");
SmallVector<Value *, 4> idxList; SmallVector<Value *, 4> idxList;
EmitHLSLAggregateCopy(CGF, argLV.getAddress(), tmpLV.getAddress(), EmitHLSLAggregateCopy(CGF, argAddr, tmpArgAddr,
idxList, ArgTy, ParamTy, idxList, ArgTy, ParamTy,
argLV.getAddress()->getType()); argAddr->getType());
} }
} }
} }

Просмотреть файл

@ -1811,11 +1811,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
// Make sure the result is of the correct type. // Make sure the result is of the correct type.
if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) { if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) {
// HLSL Change Begins
// TODO: do we put address space in type?
if (LangOpts.HLSL) return Entry;
else
// HLSL Change Ends
return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
} }
@ -1869,7 +1864,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
GV->setSection(".cp.rodata"); GV->setSection(".cp.rodata");
} }
if (AddrSpace != Ty->getAddressSpace() && !LangOpts.HLSL) // HLSL Change -TODO: do we put address space in type? if (AddrSpace != Ty->getAddressSpace())
return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty); return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);

Просмотреть файл

@ -10806,6 +10806,9 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A,
case AttributeList::AT_HLSLGroupShared: case AttributeList::AT_HLSLGroupShared:
declAttr = ::new (S.Context) HLSLGroupSharedAttr(A.getRange(), S.Context, declAttr = ::new (S.Context) HLSLGroupSharedAttr(A.getRange(), S.Context,
A.getAttributeSpellingListIndex()); A.getAttributeSpellingListIndex());
if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
VD->setType(S.Context.getAddrSpaceQualType(VD->getType(), DXIL::kTGSMAddrSpace));
}
break; break;
case AttributeList::AT_HLSLUniform: case AttributeList::AT_HLSLUniform:
declAttr = ::new (S.Context) HLSLUniformAttr(A.getRange(), S.Context, declAttr = ::new (S.Context) HLSLUniformAttr(A.getRange(), S.Context,

Просмотреть файл

@ -0,0 +1,29 @@
// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s
// Make sure nested empty struct works. Also test related paths such as
// derived, multi-dim array in constant buffer, and argument passing.
// CHECK: main
struct KillerStruct {};
struct InnerStruct {
KillerStruct s;
};
struct OuterStruct {
InnerStruct s;
};
class Derived : OuterStruct {
InnerStruct s2;
};
cbuffer Params_cbuffer : register(b0) {
Derived constants[2][3];
};
float4 foo(Derived s) { return (float4)0; }
float4 main() : SV_POSITION {
return foo(constants[1][2]);
}

Просмотреть файл

@ -0,0 +1,50 @@
// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
// This tests cast of derived to base when derived is groupshared.
// Different use cases can hit different code paths, hence the variety of
// uses here:
// - calling base method
// - vector element assignment on base member
// - casting to base and passing to function
// The barrier and write to RWBuf prevents optimizations from eliminating
// groupshared use, considering this dead-code, or detecting a race condition.
// CHECK: @[[gs0:.+]] = addrspace(3) global i32 undef
// CHECK: @[[gs1:.+]] = addrspace(3) global i32 undef
// CHECK: @[[gs2:.+]] = addrspace(3) global i32 undef
// CHECK: store i32 1, i32 addrspace(3)* @[[gs0]], align 4
// CHECK: store i32 2, i32 addrspace(3)* @[[gs1]], align 4
// CHECK: store i32 3, i32 addrspace(3)* @[[gs2]], align 4
// CHECK: %[[l0:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs0]], align 4
// CHECK: %[[l1:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs1]], align 4
// CHECK: %[[l2:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs2]], align 4
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.+}}, i32 %{{.+}}, i32 undef, i32 %[[l0]], i32 %[[l1]], i32 %[[l2]], i32 undef, i8 7)
class Base {
uint3 u;
void set_u_y(uint value) { u.y = value; }
};
class Derived : Base {
float bar;
};
groupshared Derived gs_derived;
RWByteAddressBuffer RWBuf;
void UpdateBase_z(inout Base b, uint value) {
b.u.z = value;
}
[numthreads(2, 1, 1)]
void main(uint3 groupThreadID: SV_GroupThreadID) {
if (groupThreadID.x == 0) {
gs_derived.u.x = 1;
gs_derived.set_u_y(2);
UpdateBase_z((Base)gs_derived, 3);
}
GroupMemoryBarrierWithGroupSync();
uint addr = groupThreadID.x * 4;
RWBuf.Store3(addr, gs_derived.u);
}

Просмотреть файл

@ -0,0 +1,49 @@
// RUN: %dxc -E main -T cs_6_0 -Zpc %s | FileCheck %s
// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
// CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1)
// CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0
// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4
// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
float4 rows[4];
void set_row(inout float4 row, uint i) {
row = rows[i];
}
class Obj {
float4x4 mat;
void set() {
set_row(mat[0], 0);
set_row(mat[1], 1);
set_row(mat[2], 2);
set_row(mat[3], 3);
}
};
RWByteAddressBuffer RWBuf;
groupshared Obj obj;
[numthreads(4, 1, 1)]
void main(uint3 groupThreadID: SV_GroupThreadID) {
if (groupThreadID.x == 0) {
obj.set();
}
GroupMemoryBarrierWithGroupSync();
float4 row = obj.mat[groupThreadID.x];
uint addr = groupThreadID.x * 4;
RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
}

Просмотреть файл

@ -0,0 +1,48 @@
// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s
// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0)
// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0
// CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1
// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16
// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4
// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16
// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}}
// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4
// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32
// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32
// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15)
float4 rows[4];
void set_row(inout float4 row, uint i) {
row = rows[i];
}
class Obj {
float4x4 mat;
void set() {
set_row(mat[0], 0);
set_row(mat[1], 1);
set_row(mat[2], 2);
set_row(mat[3], 3);
}
};
RWByteAddressBuffer RWBuf;
groupshared Obj obj;
[numthreads(4, 1, 1)]
void main(uint3 groupThreadID: SV_GroupThreadID) {
if (groupThreadID.x == 0) {
obj.set();
}
GroupMemoryBarrierWithGroupSync();
float4 row = obj.mat[groupThreadID.x];
uint addr = groupThreadID.x * 4;
RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w)));
}

Просмотреть файл

@ -17,7 +17,7 @@ A a;
static A a2; static A a2;
void set(A aa) { void set(out A aa) {
aa = a; aa = a;
} }

Просмотреть файл

@ -0,0 +1,28 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Verify that modificaion of in-only struct parameter does not modify
// the value passed in by the caller.
// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
// CHECK-DAG: [[o1:%[^ ]*]] = fmul fast float [[p]], [[f]]
// CHECK-DAG: [[ret:%[^ ]*]] = fadd fast float [[o1]], [[p]]
// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
struct PayloadStruct {
float Color;
};
PayloadStruct MulPayload(in PayloadStruct Payload, in float x)
{
Payload.Color *= x;
return Payload;
}
void main(PayloadStruct p : Payload,
float f : INPUT,
out PayloadStruct o : SV_Target) {
o = MulPayload(p, f);
o.Color += p.Color;
}

Просмотреть файл

@ -0,0 +1,38 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
// Verify that passing struct result of call as arg to another call does not
// generate extra call.
// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0,
// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0,
// CHECK-DAG: [[factor:%[^ ]*]] = fmul fast float [[f]], 2.000000e+00
// CHECK-DAG: [[factor2:%[^ ]*]] = fadd fast float [[factor]], 1.000000e+00
// CHECK-DAG: [[ret:%[^ ]*]] = fmul fast float [[factor2]], [[p]]
// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]])
struct PayloadStruct {
float Color;
};
static float factor = 1.0;
PayloadStruct MulPayload(in PayloadStruct Payload)
{
Payload.Color *= factor;
factor += 1.0;
return Payload;
}
PayloadStruct AddPayload(in PayloadStruct Payload0, in PayloadStruct Payload1)
{
Payload0.Color += Payload1.Color;
return Payload0;
}
void main(PayloadStruct p : Payload,
float f : INPUT,
out PayloadStruct OutputPayload : SV_Target) {
factor = f;
OutputPayload = AddPayload(MulPayload(p),
MulPayload(p));
}

Просмотреть файл

@ -1,7 +1,8 @@
// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
// Make sure function call on external function has correct type. // Make sure function call on external function has correct type.
// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* nonnull {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}}) // CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}})
struct T { struct T {
float a; float a;
float b; float b;

Просмотреть файл

@ -2,7 +2,7 @@
// Make sure function call on external function has correct type. // Make sure function call on external function has correct type.
// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull %tmp) #2 // CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull {{.*}}) #2
struct T { struct T {
float a; float a;

Просмотреть файл

@ -1,7 +1,7 @@
// RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s
// Make sure empty struct arg works. // Make sure empty struct arg is replaced with undef.
// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* %t) // CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* undef)
struct T { struct T {
}; };