From b4bb5795cdefcb1fbdc8057b4f2453a8e5be4d2e Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 19 Feb 2019 18:37:27 -0800 Subject: [PATCH] Fix some argument passing and groupshared bad codegen and crashes - set address space for groupshared QualType and fix downstream effects Fix: - double LValue expression emit for in aggregate arguments - in agg param modifying caller's value instead of copy - groupshared matrix support in HLMatrixLower - groupshared base class member access - groupshared matrix member casting in class method Still in need of more fixes and tests: - incomplete array and auto dimensions from initializer Argument handling still needs overhaul. This fix retains old behavior, even when not quite correct to avoid worse regressions. Objects are not copied in, aggregate LValueToRValue cast expr is emitted as LValues instead of RValues because RValue emit path doesn't have HLSL changes necessary to handle certain cases, such as derived-to-base cast. --- include/dxc/DXIL/DxilUtil.h | 1 + lib/DXIL/DxilUtil.cpp | 72 +++++++----- lib/HLSL/HLMatrixLowerPass.cpp | 88 ++++++++++++-- .../Scalar/ScalarReplAggregatesHLSL.cpp | 27 ++++- tools/clang/lib/CodeGen/CGClass.cpp | 3 +- tools/clang/lib/CodeGen/CGDecl.cpp | 2 +- tools/clang/lib/CodeGen/CGExpr.cpp | 15 ++- tools/clang/lib/CodeGen/CGHLSLMS.cpp | 111 +++++++++++++----- tools/clang/lib/CodeGen/CodeGenModule.cpp | 7 +- tools/clang/lib/Sema/SemaHLSL.cpp | 3 + .../CodeGenHLSL/quick-test/empty_struct3.hlsl | 29 +++++ .../quick-test/groupshared-base-cast.hlsl | 50 ++++++++ ...oupshared-member-matrix-subscript-col.hlsl | 49 ++++++++ .../groupshared-member-matrix-subscript.hlsl | 48 ++++++++ .../quick-test/static_global_copy3.hlsl | 2 +- .../quick-test/struct_param_in_mod.hlsl | 28 +++++ .../quick-test/struct_param_in_mod2.hlsl | 38 ++++++ .../lib_arg_flatten/lib_arg_flatten.hlsl | 3 +- .../lib_arg_flatten/lib_arg_flatten3.hlsl | 2 +- .../lib_arg_flatten/lib_empty_struct_arg.hlsl | 4 +- 20 files changed, 499 insertions(+), 83 deletions(-) create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/empty_struct3.hlsl create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/groupshared-base-cast.hlsl create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod.hlsl create mode 100644 tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod2.hlsl diff --git a/include/dxc/DXIL/DxilUtil.h b/include/dxc/DXIL/DxilUtil.h index c17762d08..2087bb334 100644 --- a/include/dxc/DXIL/DxilUtil.h +++ b/include/dxc/DXIL/DxilUtil.h @@ -104,6 +104,7 @@ namespace dxilutil { void PrintDiagnosticHandler(const llvm::DiagnosticInfo &DI, void *Context); // Returns true if type contains HLSL Object type (resource) bool ContainsHLSLObjectType(llvm::Type *Ty); + bool IsHLSLResourceType(llvm::Type *Ty); bool IsHLSLObjectType(llvm::Type *Ty); bool IsHLSLMatrixType(llvm::Type *Ty); bool IsSplat(llvm::ConstantDataVector *cdv); diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index f36f4db5e..b95795ff2 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -396,16 +396,9 @@ llvm::Instruction *FirstNonAllocaInsertionPt(llvm::Function* F) { return SkipAllocas(FindAllocaInsertionPt(F)); } -bool IsHLSLObjectType(llvm::Type *Ty) { +bool IsHLSLResourceType(llvm::Type *Ty) { if (llvm::StructType *ST = dyn_cast(Ty)) { StringRef name = ST->getName(); - // TODO: don't check names. - if (name.startswith("dx.types.wave_t")) - return true; - - if (name.endswith("_slice_type")) - return false; - name = name.ltrim("class."); name = name.ltrim("struct."); @@ -414,13 +407,6 @@ bool IsHLSLObjectType(llvm::Type *Ty) { if (name == "SamplerComparisonState") return true; - if (name.startswith("TriangleStream<")) - return true; - if (name.startswith("PointStream<")) - return true; - if (name.startswith("LineStream<")) - return true; - if (name.startswith("AppendStructuredBuffer<")) return true; if (name.startswith("ConsumeStructuredBuffer<")) @@ -441,23 +427,53 @@ bool IsHLSLObjectType(llvm::Type *Ty) { return true; if (name.startswith("StructuredBuffer<")) return true; - if (name.startswith("Texture1D<")) + + if (name.startswith("Texture")) { + name = name.ltrim("Texture"); + if (name.startswith("1D<")) + return true; + if (name.startswith("1DArray<")) + return true; + if (name.startswith("2D<")) + return true; + if (name.startswith("2DArray<")) + return true; + if (name.startswith("3D<")) + return true; + if (name.startswith("Cube<")) + return true; + if (name.startswith("CubeArray<")) + return true; + if (name.startswith("2DMS<")) + return true; + if (name.startswith("2DMSArray<")) + return true; + } + } + return false; +} + +bool IsHLSLObjectType(llvm::Type *Ty) { + if (llvm::StructType *ST = dyn_cast(Ty)) { + StringRef name = ST->getName(); + // TODO: don't check names. + if (name.startswith("dx.types.wave_t")) return true; - if (name.startswith("Texture1DArray<")) + + if (name.endswith("_slice_type")) + return false; + + if (IsHLSLResourceType(Ty)) return true; - if (name.startswith("Texture2D<")) + + name = name.ltrim("class."); + name = name.ltrim("struct."); + + if (name.startswith("TriangleStream<")) return true; - if (name.startswith("Texture2DArray<")) + if (name.startswith("PointStream<")) return true; - if (name.startswith("Texture3D<")) - return true; - if (name.startswith("TextureCube<")) - return true; - if (name.startswith("TextureCubeArray<")) - return true; - if (name.startswith("Texture2DMS<")) - return true; - if (name.startswith("Texture2DMSArray<")) + if (name.startswith("LineStream<")) return true; } return false; diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index a5a7e474b..ce367c6b9 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -2330,9 +2330,78 @@ void HLMatrixLowerPass::DeleteDeadInsts() { m_inDeadInstsSet.clear(); } +// Iterate users, tunnel through address space cast, and skip unused constant +// users. +class UserIter_TunnelAddrSpace_SkipUnusedConstantUser { +private: + Value::user_iterator UserIt; + Value::user_iterator AddrSpaceCastUserIt; + +public: + User *U = nullptr; +public: + UserIter_TunnelAddrSpace_SkipUnusedConstantUser() {} + UserIter_TunnelAddrSpace_SkipUnusedConstantUser(Value *V) + { + if (!V->user_empty()) + UserIt = V->user_begin(); + Next(); + } + UserIter_TunnelAddrSpace_SkipUnusedConstantUser( + const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) = default; + + User *Next() { + U = nullptr; + if (!AddrSpaceCastUserIt.atEnd()) { + U = *(AddrSpaceCastUserIt++); + return U; + } + while (!UserIt.atEnd()) { + U = *(UserIt++); + if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->user_empty()) + continue; + if (CE->getOpcode() == Instruction::AddrSpaceCast) { + AddrSpaceCastUserIt = CE->user_begin(); + U = *(AddrSpaceCastUserIt++); + return U; + } + } + return U; + } + U = nullptr; + return U; + } + + UserIter_TunnelAddrSpace_SkipUnusedConstantUser begin() { return *this; } + UserIter_TunnelAddrSpace_SkipUnusedConstantUser end() { + return UserIter_TunnelAddrSpace_SkipUnusedConstantUser(); + } + bool + operator==(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) { + return (U == other.U && UserIt == other.UserIt && + AddrSpaceCastUserIt == other.AddrSpaceCastUserIt); + } + bool + operator!=(const UserIter_TunnelAddrSpace_SkipUnusedConstantUser &other) { + return !(*this == (other)); + } + UserIter_TunnelAddrSpace_SkipUnusedConstantUser &operator++() { + Next(); + return *this; + } + UserIter_TunnelAddrSpace_SkipUnusedConstantUser operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + User *operator*() { return U; } + User *operator->() { return U; } +}; + static bool OnlyUsedByMatrixLdSt(Value *V) { bool onlyLdSt = true; - for (User *user : V->users()) { + for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(V)) { if (isa(user) && user->use_empty()) continue; @@ -2417,7 +2486,7 @@ void HLMatrixLowerPass::runOnGlobalMatrixArray(GlobalVariable *GV) { HLModule::UpdateGlobalVariableDebugInfo(GV, Finder, VecGV); } - for (User *U : GV->users()) { + for (User *U : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) { Value *VecGEP = nullptr; // Must be GEP or GEPOperator. if (GetElementPtrInst *GEP = dyn_cast(U)) { @@ -2522,7 +2591,7 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) { } vecGlobals[i] = EltGV; } - for (User *user : GV->users()) { + for (User *user : UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) { if (isa(user) && user->use_empty()) continue; CallInst *CI = cast(user); @@ -2536,9 +2605,14 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) { // lower to array of scalar here. ArrayType *AT = ArrayType::get(vecTy->getVectorElementType(), vecTy->getVectorNumElements()); Constant *InitVal = ConstantArray::get(AT, Elts); + unsigned AddressSpace = GV->getType()->getAddressSpace(); + GlobalValue::LinkageTypes linkage = GV->getLinkage(); + GlobalVariable::ThreadLocalMode TLMode = GV->getThreadLocalMode(); GlobalVariable *arrayMat = new llvm::GlobalVariable( - *M, AT, /*IsConstant*/ false, llvm::GlobalValue::InternalLinkage, - /*InitVal*/ InitVal, GV->getName()); + *M, AT, /*IsConstant*/ false, linkage, + /*InitVal*/ InitVal, GV->getName(), + /*InsertBefore*/nullptr, + TLMode, AddressSpace); // Add debug info. if (m_HasDbgInfo) { DebugInfoFinder &Finder = m_pHLModule->GetOrCreateDebugInfoFinder(); @@ -2546,8 +2620,8 @@ void HLMatrixLowerPass::runOnGlobal(GlobalVariable *GV) { arrayMat); } - for (auto U = GV->user_begin(); U != GV->user_end();) { - Value *user = *(U++); + for (User *U :UserIter_TunnelAddrSpace_SkipUnusedConstantUser(GV)) { + Value *user = U; CallInst *CI = cast(user); HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); if (group == HLOpcodeGroup::HLMatLoadStore) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 0d43dbc2c..3f47af8a2 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2442,8 +2442,8 @@ static unsigned MatchSizeByCheckElementType(Type *Ty, const DataLayout &DL, unsi unsigned ptrSize = DL.getTypeAllocSize(Ty); // Size match, return current level. if (ptrSize == size) { - // Not go deeper for matrix. - if (dxilutil::IsHLSLMatrixType(Ty)) + // Do not go deeper for matrix or object. + if (dxilutil::IsHLSLMatrixType(Ty) || dxilutil::IsHLSLObjectType(Ty)) return level; // For struct, go deeper if size not change. // This will leave memcpy to deeper level when flatten. @@ -4186,9 +4186,28 @@ bool SROA_Helper::LowerMemcpy(Value *V, DxilFieldAnnotation *annotation, /// MarkEmptyStructUsers - Add instruction related to Empty struct to DeadInsts. void SROA_Helper::MarkEmptyStructUsers(Value *V, SmallVector &DeadInsts) { - for (User *U : V->users()) { - MarkEmptyStructUsers(U, DeadInsts); + UndefValue *undef = UndefValue::get(V->getType()); + for (auto itU = V->user_begin(), E = V->user_end(); itU != E;) { + Value *U = *(itU++); + // Kill memcpy, set operands to undef for call and ret, and recurse + if (MemCpyInst *MC = dyn_cast(U)) { + DeadInsts.emplace_back(MC); + } else if (CallInst *CI = dyn_cast(U)) { + for (auto &operand : CI->operands()) { + if (operand == V) + operand.set(undef); + } + } else if (ReturnInst *Ret = dyn_cast(U)) { + Ret->setOperand(0, undef); + } else if (isa(U) || isa(U) || + isa(U) || isa(U) || isa(U)) { + // Recurse users + MarkEmptyStructUsers(U, DeadInsts); + } else { + DXASSERT(false, "otherwise, recursing unexpected empty struct user"); + } } + if (Instruction *I = dyn_cast(V)) { // Only need to add no use inst here. // DeleteDeadInst will delete everything. diff --git a/tools/clang/lib/CodeGen/CGClass.cpp b/tools/clang/lib/CodeGen/CGClass.cpp index 96a02ac42..576dc281c 100644 --- a/tools/clang/lib/CodeGen/CGClass.cpp +++ b/tools/clang/lib/CodeGen/CGClass.cpp @@ -171,7 +171,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass( // Get the base pointer type. llvm::Type *BasePtrTy = - ConvertType((PathEnd[-1])->getType())->getPointerTo(); + ConvertType((PathEnd[-1])->getType())->getPointerTo( + Value->getType()->getPointerAddressSpace()); // HLSL Change: match address space QualType DerivedTy = getContext().getRecordType(Derived); CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy); diff --git a/tools/clang/lib/CodeGen/CGDecl.cpp b/tools/clang/lib/CodeGen/CGDecl.cpp index e9096be80..4fbe348ce 100644 --- a/tools/clang/lib/CodeGen/CGDecl.cpp +++ b/tools/clang/lib/CodeGen/CGDecl.cpp @@ -1795,7 +1795,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg, } LValue lv = MakeAddrLValue(DeclPtr, Ty, Align); - if (IsScalar) { + if (!getLangOpts().HLSL && IsScalar) { // HLSL Change: not ObjC Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { // We honor __attribute__((ns_consumed)) for types with lifetime. diff --git a/tools/clang/lib/CodeGen/CGExpr.cpp b/tools/clang/lib/CodeGen/CGExpr.cpp index 5b612bb0e..52e4c8b0c 100644 --- a/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/tools/clang/lib/CodeGen/CGExpr.cpp @@ -3285,10 +3285,23 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { LValue LV = EmitLValue(E->getSubExpr()); QualType ToType = getContext().getLValueReferenceType(E->getType()); + llvm::Value *FromValue = LV.getAddress(); + llvm::Type *FromTy = FromValue->getType(); llvm::Type *RetTy = ConvertType(ToType); // type not changed, LValueToRValue, CStyleCast may go this path - if (LV.getAddress()->getType() == RetTy) + if (FromTy == RetTy) { return LV; + // If only address space changed, add address space cast + } + if (FromTy->getPointerAddressSpace() != RetTy->getPointerAddressSpace()) { + llvm::Type *ConvertedFromTy = llvm::PointerType::get( + FromTy->getPointerElementType(), RetTy->getPointerAddressSpace()); + assert(ConvertedFromTy == RetTy && + "otherwise, more than just address space changing in one step"); + llvm::Value *cast = + Builder.CreateAddrSpaceCast(FromValue, ConvertedFromTy); + return MakeAddrLValue(cast, ToType); + } llvm::Value *cast = CGM.getHLSLRuntime().EmitHLSLMatrixOperationCall(*this, E, RetTy, { LV.getAddress() }); return MakeAddrLValue(cast, ToType); } diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 4250d3968..76f3d2a85 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -7010,9 +7010,15 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( const ParmVarDecl *Param = FD->getParamDecl(i); const Expr *Arg = E->getArg(i+ArgsToSkip); QualType ParamTy = Param->getType().getNonReferenceType(); + bool isObject = dxilutil::IsHLSLObjectType(CGF.ConvertTypeForMem(ParamTy)); + bool isAggregateType = !isObject && + (ParamTy->isArrayType() || ParamTy->isRecordType()) && + !hlsl::IsHLSLVecMatType(ParamTy); + + bool EmitRValueAgg = false; bool RValOnRef = false; if (!Param->isModifierOut()) { - if (!ParamTy->isAggregateType() || hlsl::IsHLSLMatType(ParamTy)) { + if (!isAggregateType && !isObject) { if (Arg->isRValue() && Param->getType()->isReferenceType()) { // RValue on a reference type. if (const CStyleCastExpr *cCast = dyn_cast(Arg)) { @@ -7035,28 +7041,62 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( } else { continue; } + } else if (isAggregateType) { + // aggregate in-only - emit RValue, unless LValueToRValue cast + EmitRValueAgg = true; + if (const ImplicitCastExpr *cast = + dyn_cast(Arg)) { + if (cast->getCastKind() == CastKind::CK_LValueToRValue) { + EmitRValueAgg = false; + } + } + } else { + // Must be object + DXASSERT(isObject, "otherwise, flow condition changed, breaking assumption"); + // in-only objects should be skipped to preserve previous behavior. + continue; } } - // get original arg - LValue argLV = CGF.EmitLValue(Arg); + // Skip unbounded array, since we cannot preserve copy-in copy-out + // semantics for these. + if (ParamTy->isIncompleteArrayType()) { + continue; + } if (!Param->isModifierOut() && !RValOnRef) { - bool isDefaultAddrSpace = true; - if (argLV.isSimple()) { - isDefaultAddrSpace = - argLV.getAddress()->getType()->getPointerAddressSpace() == - DXIL::kDefaultAddrSpace; - } - bool isHLSLIntrinsic = false; + // No need to copy arg to in-only param for hlsl intrinsic. if (const FunctionDecl *Callee = E->getDirectCallee()) { - isHLSLIntrinsic = Callee->hasAttr(); + if (Callee->hasAttr()) + continue; } - // Copy in arg which is not default address space and not on hlsl intrinsic. - if (isDefaultAddrSpace || isHLSLIntrinsic) - continue; } + + // get original arg + // FIXME: This will not emit in correct argument order with the other + // arguments. This should be integrated into + // CodeGenFunction::EmitCallArg if possible. + RValue argRV; // emit this if aggregate arg on in-only param + LValue argLV; // otherwise, we may emit this + llvm::Value *argAddr = nullptr; + QualType argType = Arg->getType(); + CharUnits argAlignment; + if (EmitRValueAgg) { + argRV = CGF.EmitAnyExprToTemp(Arg); + argAddr = argRV.getAggregateAddr(); // must be alloca + argAlignment = CharUnits::fromQuantity(cast(argAddr)->getAlignment()); + argLV = LValue::MakeAddr(argAddr, ParamTy, argAlignment, CGF.getContext()); + } else { + argLV = CGF.EmitLValue(Arg); + if (argLV.isSimple()) + argAddr = argLV.getAddress(); + argType = argLV.getType(); // TBD: Can this be different than Arg->getType()? + argAlignment = argLV.getAlignment(); + } + // After emit Arg, we must update the argList[i], + // otherwise we get double emit of the expression. + // create temp Var VarDecl *tmpArg = VarDecl::Create(CGF.getContext(), const_cast(FD), @@ -7065,17 +7105,26 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( CGF.getContext().getTrivialTypeSourceInfo(ParamTy), StorageClass::SC_Auto); + bool isEmptyAggregate = false; + if (isAggregateType) { + DXASSERT(argAddr, "should be RV or simple LV"); + llvm::Type *ElTy = argAddr->getType()->getPointerElementType(); + while (ElTy->isArrayTy()) + ElTy = ElTy->getArrayElementType(); + if (llvm::StructType *ST = dyn_cast(ElTy)) { + DxilStructAnnotation *SA = m_pHLModule->GetTypeSystem().GetStructAnnotation(ST); + isEmptyAggregate = SA && SA->IsEmptyStruct(); + } + } + // Aggregate type will be indirect param convert to pointer type. // So don't update to ReferenceType, use RValue for it. - bool isAggregateType = (ParamTy->isArrayType() || ParamTy->isRecordType()) && - !hlsl::IsHLSLVecMatType(ParamTy); - const DeclRefExpr *tmpRef = DeclRefExpr::Create( CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), tmpArg, /*enclosing*/ false, tmpArg->getLocation(), ParamTy, - isAggregateType ? VK_RValue : VK_LValue); + (isAggregateType || isObject) ? VK_RValue : VK_LValue); - // update the arg + // must update the arg, since we did emit Arg, else we get double emit. argList[i] = tmpRef; // create alloc for the tmp arg @@ -7090,7 +7139,12 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( // add it to local decl map TmpArgMap(tmpArg, tmpArgAddr); - LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argLV.getAlignment(), + // If param is empty, copy in/out will just create problems. + // No copy will result in undef, which is fine. + if (isEmptyAggregate) + continue; + + LValue tmpLV = LValue::MakeAddr(tmpArgAddr, ParamTy, argAlignment, CGF.getContext()); // save for cast after call @@ -7099,22 +7153,18 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( castArgList.emplace_back(argLV); } - bool isObject = dxilutil::IsHLSLObjectType( - tmpArgAddr->getType()->getPointerElementType()); - // cast before the call if (Param->isModifierIn() && // Don't copy object !isObject) { QualType ArgTy = Arg->getType(); Value *outVal = nullptr; - bool isAggregateTy = ParamTy->isAggregateType() && !IsHLSLVecMatType(ParamTy); - if (!isAggregateTy) { + if (!isAggregateType) { if (!IsHLSLMatType(ParamTy)) { RValue outRVal = CGF.EmitLoadOfLValue(argLV, SourceLocation()); outVal = outRVal.getScalarVal(); } else { - Value *argAddr = argLV.getAddress(); + DXASSERT(argAddr, "should be RV or simple LV"); outVal = EmitHLSLMatrixLoad(CGF, argAddr, ArgTy); } @@ -7124,15 +7174,16 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( EmitHLSLMatrixStore(CGF, castVal, tmpArgAddr, ParamTy); } else { - Value *castVal = ConvertScalarOrVector(CGF, outVal, argLV.getType(), tmpLV.getType()); - castVal = CGF.EmitToMemory(castVal, tmpLV.getType()); + Value *castVal = ConvertScalarOrVector(CGF, outVal, argType, ParamTy); + castVal = CGF.EmitToMemory(castVal, ParamTy); CGF.Builder.CreateStore(castVal, tmpArgAddr); } } else { + DXASSERT(argAddr, "should be RV or simple LV"); SmallVector idxList; - EmitHLSLAggregateCopy(CGF, argLV.getAddress(), tmpLV.getAddress(), + EmitHLSLAggregateCopy(CGF, argAddr, tmpArgAddr, idxList, ArgTy, ParamTy, - argLV.getAddress()->getType()); + argAddr->getType()); } } } diff --git a/tools/clang/lib/CodeGen/CodeGenModule.cpp b/tools/clang/lib/CodeGen/CodeGenModule.cpp index 6ce712b1a..8773f3024 100644 --- a/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -1811,11 +1811,6 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // Make sure the result is of the correct type. if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) { - // HLSL Change Begins - // TODO: do we put address space in type? - if (LangOpts.HLSL) return Entry; - else - // HLSL Change Ends return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); } @@ -1869,7 +1864,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GV->setSection(".cp.rodata"); } - if (AddrSpace != Ty->getAddressSpace() && !LangOpts.HLSL) // HLSL Change -TODO: do we put address space in type? + if (AddrSpace != Ty->getAddressSpace()) return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 3ef13faf7..58fe99470 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -10806,6 +10806,9 @@ void hlsl::HandleDeclAttributeForHLSL(Sema &S, Decl *D, const AttributeList &A, case AttributeList::AT_HLSLGroupShared: declAttr = ::new (S.Context) HLSLGroupSharedAttr(A.getRange(), S.Context, A.getAttributeSpellingListIndex()); + if (VarDecl *VD = dyn_cast(D)) { + VD->setType(S.Context.getAddrSpaceQualType(VD->getType(), DXIL::kTGSMAddrSpace)); + } break; case AttributeList::AT_HLSLUniform: declAttr = ::new (S.Context) HLSLUniformAttr(A.getRange(), S.Context, diff --git a/tools/clang/test/CodeGenHLSL/quick-test/empty_struct3.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/empty_struct3.hlsl new file mode 100644 index 000000000..349769823 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/empty_struct3.hlsl @@ -0,0 +1,29 @@ +// RUN: %dxc -E main -T vs_6_0 %s | FileCheck %s + +// Make sure nested empty struct works. Also test related paths such as +// derived, multi-dim array in constant buffer, and argument passing. +// CHECK: main + +struct KillerStruct {}; + +struct InnerStruct { + KillerStruct s; +}; + +struct OuterStruct { + InnerStruct s; +}; + +class Derived : OuterStruct { + InnerStruct s2; +}; + +cbuffer Params_cbuffer : register(b0) { + Derived constants[2][3]; +}; + +float4 foo(Derived s) { return (float4)0; } + +float4 main() : SV_POSITION { + return foo(constants[1][2]); +} diff --git a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-base-cast.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-base-cast.hlsl new file mode 100644 index 000000000..189d24600 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-base-cast.hlsl @@ -0,0 +1,50 @@ +// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s + +// This tests cast of derived to base when derived is groupshared. +// Different use cases can hit different code paths, hence the variety of +// uses here: +// - calling base method +// - vector element assignment on base member +// - casting to base and passing to function +// The barrier and write to RWBuf prevents optimizations from eliminating +// groupshared use, considering this dead-code, or detecting a race condition. + +// CHECK: @[[gs0:.+]] = addrspace(3) global i32 undef +// CHECK: @[[gs1:.+]] = addrspace(3) global i32 undef +// CHECK: @[[gs2:.+]] = addrspace(3) global i32 undef +// CHECK: store i32 1, i32 addrspace(3)* @[[gs0]], align 4 +// CHECK: store i32 2, i32 addrspace(3)* @[[gs1]], align 4 +// CHECK: store i32 3, i32 addrspace(3)* @[[gs2]], align 4 + +// CHECK: %[[l0:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs0]], align 4 +// CHECK: %[[l1:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs1]], align 4 +// CHECK: %[[l2:[^ ]+]] = load i32, i32 addrspace(3)* @[[gs2]], align 4 +// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.+}}, i32 %{{.+}}, i32 undef, i32 %[[l0]], i32 %[[l1]], i32 %[[l2]], i32 undef, i8 7) + + +class Base { + uint3 u; + void set_u_y(uint value) { u.y = value; } +}; +class Derived : Base { + float bar; +}; + +groupshared Derived gs_derived; +RWByteAddressBuffer RWBuf; + +void UpdateBase_z(inout Base b, uint value) { + b.u.z = value; +} + +[numthreads(2, 1, 1)] +void main(uint3 groupThreadID: SV_GroupThreadID) { + if (groupThreadID.x == 0) { + gs_derived.u.x = 1; + gs_derived.set_u_y(2); + UpdateBase_z((Base)gs_derived, 3); + } + GroupMemoryBarrierWithGroupSync(); + uint addr = groupThreadID.x * 4; + RWBuf.Store3(addr, gs_derived.u); +} diff --git a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl new file mode 100644 index 000000000..b6eb5535a --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript-col.hlsl @@ -0,0 +1,49 @@ +// RUN: %dxc -E main -T cs_6_0 -Zpc %s | FileCheck %s + +// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0) +// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0 +// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16 + +// CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 1) +// CHECK: %[[cb1x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0 +// CHECK: store float %[[cb1x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4 + +// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}} +// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 4 +// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}} +// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4 + +// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32 +// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32 + +// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{[^,]+}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15) + +float4 rows[4]; + +void set_row(inout float4 row, uint i) { + row = rows[i]; +} + +class Obj { + float4x4 mat; + void set() { + set_row(mat[0], 0); + set_row(mat[1], 1); + set_row(mat[2], 2); + set_row(mat[3], 3); + } +}; + +RWByteAddressBuffer RWBuf; +groupshared Obj obj; + +[numthreads(4, 1, 1)] +void main(uint3 groupThreadID: SV_GroupThreadID) { + if (groupThreadID.x == 0) { + obj.set(); + } + GroupMemoryBarrierWithGroupSync(); + float4 row = obj.mat[groupThreadID.x]; + uint addr = groupThreadID.x * 4; + RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w))); +} diff --git a/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl new file mode 100644 index 000000000..7380a69c4 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/groupshared-member-matrix-subscript.hlsl @@ -0,0 +1,48 @@ +// RUN: %dxc -E main -T cs_6_0 -Zpr %s | FileCheck %s + +// CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %{{.*}}, i32 0) +// CHECK: %[[cb0x:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0 +// CHECK: %[[cb0y:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1 + +// CHECK: store float %[[cb0x]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj:[^,]+]], i32 0, i32 0), align 16 +// CHECK: store float %[[cb0y]], float addrspace(3)* getelementptr inbounds ([16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 1), align 4 + +// CHECK: %[[_25:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}} +// CHECK: %[[_26:[^ ]+]] = load float, float addrspace(3)* %[[_25]], align 16 +// CHECK: %[[_27:[^ ]+]] = getelementptr [16 x float], [16 x float] addrspace(3)* @[[obj]], i32 0, i32 %{{.+}} +// CHECK: %[[_28:[^ ]+]] = load float, float addrspace(3)* %[[_27]], align 4 + +// CHECK: %[[_33:[^ ]+]] = bitcast float %[[_26]] to i32 +// CHECK: %[[_34:[^ ]+]] = bitcast float %[[_28]] to i32 + +// CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %{{.*}}, i32 %{{.+}}, i32 undef, i32 %[[_33]], i32 %[[_34]], i32 %{{.+}}, i32 %{{.+}}, i8 15) + +float4 rows[4]; + +void set_row(inout float4 row, uint i) { + row = rows[i]; +} + +class Obj { + float4x4 mat; + void set() { + set_row(mat[0], 0); + set_row(mat[1], 1); + set_row(mat[2], 2); + set_row(mat[3], 3); + } +}; + +RWByteAddressBuffer RWBuf; +groupshared Obj obj; + +[numthreads(4, 1, 1)] +void main(uint3 groupThreadID: SV_GroupThreadID) { + if (groupThreadID.x == 0) { + obj.set(); + } + GroupMemoryBarrierWithGroupSync(); + float4 row = obj.mat[groupThreadID.x]; + uint addr = groupThreadID.x * 4; + RWBuf.Store4(addr, uint4(asuint(row.x), asuint(row.y), asuint(row.z), asuint(row.w))); +} diff --git a/tools/clang/test/CodeGenHLSL/quick-test/static_global_copy3.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/static_global_copy3.hlsl index d04bf5760..b6a79e11b 100644 --- a/tools/clang/test/CodeGenHLSL/quick-test/static_global_copy3.hlsl +++ b/tools/clang/test/CodeGenHLSL/quick-test/static_global_copy3.hlsl @@ -17,7 +17,7 @@ A a; static A a2; -void set(A aa) { +void set(out A aa) { aa = a; } diff --git a/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod.hlsl new file mode 100644 index 000000000..762aa41b8 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s + +// Verify that modificaion of in-only struct parameter does not modify +// the value passed in by the caller. + +// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, +// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, +// CHECK-DAG: [[o1:%[^ ]*]] = fmul fast float [[p]], [[f]] +// CHECK-DAG: [[ret:%[^ ]*]] = fadd fast float [[o1]], [[p]] +// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]]) + +struct PayloadStruct { + float Color; +}; + +PayloadStruct MulPayload(in PayloadStruct Payload, in float x) +{ + Payload.Color *= x; + return Payload; +} + +void main(PayloadStruct p : Payload, + float f : INPUT, + out PayloadStruct o : SV_Target) { + + o = MulPayload(p, f); + o.Color += p.Color; +} diff --git a/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod2.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod2.hlsl new file mode 100644 index 000000000..989d54d0e --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/struct_param_in_mod2.hlsl @@ -0,0 +1,38 @@ +// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s + +// Verify that passing struct result of call as arg to another call does not +// generate extra call. + +// CHECK-DAG: [[f:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, +// CHECK-DAG: [[p:%[^ ]*]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, +// CHECK-DAG: [[factor:%[^ ]*]] = fmul fast float [[f]], 2.000000e+00 +// CHECK-DAG: [[factor2:%[^ ]*]] = fadd fast float [[factor]], 1.000000e+00 +// CHECK-DAG: [[ret:%[^ ]*]] = fmul fast float [[factor2]], [[p]] +// CHECK-DAG: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float [[ret]]) + +struct PayloadStruct { + float Color; +}; + +static float factor = 1.0; + +PayloadStruct MulPayload(in PayloadStruct Payload) +{ + Payload.Color *= factor; + factor += 1.0; + return Payload; +} + +PayloadStruct AddPayload(in PayloadStruct Payload0, in PayloadStruct Payload1) +{ + Payload0.Color += Payload1.Color; + return Payload0; +} + +void main(PayloadStruct p : Payload, + float f : INPUT, + out PayloadStruct OutputPayload : SV_Target) { + factor = f; + OutputPayload = AddPayload(MulPayload(p), + MulPayload(p)); +} diff --git a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl index b2ee9e913..cc0521db2 100644 --- a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl +++ b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten.hlsl @@ -1,7 +1,8 @@ // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s // Make sure function call on external function has correct type. -// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* nonnull {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}}) +// CHECK: call float @"\01?test_extern@@YAMUT@@Y01U1@U1@AIAV?$matrix@M$01$01@@@Z"(%struct.T* {{.*}}, [2 x %struct.T]* {{.*}}, %struct.T* {{.*}}, %class.matrix.float.2.2* dereferenceable(16) {{.*}}) + struct T { float a; float b; diff --git a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl index 07cddf5a2..93aadfa9a 100644 --- a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl +++ b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_arg_flatten3.hlsl @@ -2,7 +2,7 @@ // Make sure function call on external function has correct type. -// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull %tmp) #2 +// CHECK: call float @"\01?test_extern@@YAMUT@@@Z"(%struct.T* nonnull {{.*}}) #2 struct T { float a; diff --git a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl index f97d37ab5..9505bcd66 100644 --- a/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl +++ b/tools/clang/test/CodeGenHLSL/shader-compat-suite/lib_arg_flatten/lib_empty_struct_arg.hlsl @@ -1,7 +1,7 @@ // RUN: %dxc -T lib_6_3 -auto-binding-space 11 -default-linkage external %s | FileCheck %s -// Make sure empty struct arg works. -// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* %t) +// Make sure empty struct arg is replaced with undef. +// CHECK: call float @"\01?test@@YAMUT@@@Z"(%struct.T* undef) struct T { };