diff --git a/include/dxc/HLSL/HLMatrixLowerHelper.h b/include/dxc/HLSL/HLMatrixLowerHelper.h index 39a432319..de61be469 100644 --- a/include/dxc/HLSL/HLMatrixLowerHelper.h +++ b/include/dxc/HLSL/HLMatrixLowerHelper.h @@ -31,17 +31,27 @@ bool IsMatrixType(llvm::Type *Ty); DxilFieldAnnotation *FindAnnotationFromMatUser(llvm::Value *Mat, DxilTypeSystem &typeSys); // Translate matrix type to vector type. -llvm::Type *LowerMatrixType(llvm::Type *Ty); +llvm::Type *LowerMatrixType(llvm::Type *Ty, bool forMem = false); // TODO: use type annotation. llvm::Type *GetMatrixInfo(llvm::Type *Ty, unsigned &col, unsigned &row); // TODO: use type annotation. bool IsMatrixArrayPointer(llvm::Type *Ty); // Translate matrix array pointer type to vector array pointer type. -llvm::Type *LowerMatrixArrayPointer(llvm::Type *Ty); +llvm::Type *LowerMatrixArrayPointer(llvm::Type *Ty, bool forMem = false); llvm::Value *BuildVector(llvm::Type *EltTy, unsigned size, llvm::ArrayRef elts, llvm::IRBuilder<> &Builder); + +llvm::Value *VecMatrixMemToReg(llvm::Value *VecVal, llvm::Type *MatType, + llvm::IRBuilder<> &Builder); +llvm::Value *VecMatrixRegToMem(llvm::Value* VecVal, llvm::Type *MatType, + llvm::IRBuilder<> &Builder); +llvm::Instruction *CreateVecMatrixLoad(llvm::Value *VecPtr, + llvm::Type *MatType, llvm::IRBuilder<> &Builder); +llvm::Instruction *CreateVecMatrixStore(llvm::Value* VecVal, llvm::Value *VecPtr, + llvm::Type *MatType, llvm::IRBuilder<> &Builder); + // For case like mat[i][j]. // IdxList is [i][0], [i][1], [i][2],[i][3]. // Idx is j. diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index 40c35e8fe..4c736a9e3 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -69,7 +69,7 @@ DxilFieldAnnotation *FindAnnotationFromMatUser(Value *Mat, } // Translate matrix type to vector type. -Type *LowerMatrixType(Type *Ty) { +Type *LowerMatrixType(Type *Ty, bool forMem) { // Only translate matrix type and function type which use matrix type. // Not translate struct has matrix or matrix pointer. // Struct should be flattened before. @@ -84,6 +84,8 @@ Type *LowerMatrixType(Type *Ty) { } else if (IsMatrixType(Ty)) { unsigned row, col; Type *EltTy = GetMatrixInfo(Ty, col, row); + if (forMem && EltTy->isIntegerTy(1)) + EltTy = Type::getInt32Ty(Ty->getContext()); return VectorType::get(EltTy, row * col); } else { return Ty; @@ -122,7 +124,7 @@ bool IsMatrixArrayPointer(llvm::Type *Ty) { Ty = Ty->getArrayElementType(); return IsMatrixType(Ty); } -Type *LowerMatrixArrayPointer(Type *Ty) { +Type *LowerMatrixArrayPointer(Type *Ty, bool forMem) { unsigned addrSpace = Ty->getPointerAddressSpace(); Ty = Ty->getPointerElementType(); std::vector arraySizeList; @@ -130,7 +132,7 @@ Type *LowerMatrixArrayPointer(Type *Ty) { arraySizeList.push_back(Ty->getArrayNumElements()); Ty = Ty->getArrayElementType(); } - Ty = LowerMatrixType(Ty); + Ty = LowerMatrixType(Ty, forMem); for (auto arraySize = arraySizeList.rbegin(); arraySize != arraySizeList.rend(); arraySize++) @@ -155,13 +157,69 @@ Type *LowerMatrixArrayPointerToOneDimArray(Type *Ty) { return PointerType::get(Ty, addrSpace); } Value *BuildVector(Type *EltTy, unsigned size, ArrayRef elts, - IRBuilder<> &Builder) { + IRBuilder<> &Builder) { Value *Vec = UndefValue::get(VectorType::get(EltTy, size)); for (unsigned i = 0; i < size; i++) Vec = Builder.CreateInsertElement(Vec, elts[i], i); return Vec; } +llvm::Value *VecMatrixMemToReg(llvm::Value *VecVal, llvm::Type *MatType, + llvm::IRBuilder<> &Builder) +{ + llvm::Type *VecMatRegTy = HLMatrixLower::LowerMatrixType(MatType, /*forMem*/false); + if (VecVal->getType() == VecMatRegTy) { + return VecVal; + } + + DXASSERT(VecMatRegTy->getVectorElementType()->isIntegerTy(1), + "Vector matrix mem to reg type mismatch should only happen for bools."); + llvm::Type *VecMatMemTy = HLMatrixLower::LowerMatrixType(MatType, /*forMem*/true); + return Builder.CreateICmpNE(VecVal, Constant::getNullValue(VecMatMemTy)); +} + +llvm::Value *VecMatrixRegToMem(llvm::Value* VecVal, llvm::Type *MatType, + llvm::IRBuilder<> &Builder) +{ + llvm::Type *VecMatMemTy = HLMatrixLower::LowerMatrixType(MatType, /*forMem*/true); + if (VecVal->getType() == VecMatMemTy) { + return VecVal; + } + + DXASSERT(VecVal->getType()->getVectorElementType()->isIntegerTy(1), + "Vector matrix reg to mem type mismatch should only happen for bools."); + return Builder.CreateZExt(VecVal, VecMatMemTy); +} + +llvm::Instruction *CreateVecMatrixLoad( + llvm::Value *VecPtr, llvm::Type *MatType, llvm::IRBuilder<> &Builder) +{ + llvm::Instruction *VecVal = Builder.CreateLoad(VecPtr); + return cast(VecMatrixMemToReg(VecVal, MatType, Builder)); +} + +llvm::Instruction *CreateVecMatrixStore(llvm::Value* VecVal, llvm::Value *VecPtr, + llvm::Type *MatType, llvm::IRBuilder<> &Builder) +{ + llvm::Type *VecMatMemTy = HLMatrixLower::LowerMatrixType(MatType, /*forMem*/true); + if (VecVal->getType() == VecMatMemTy) { + return Builder.CreateStore(VecVal, VecPtr); + } + + // We need to convert to the memory representation, and we want to return + // the conversion instruction rather than the store since that's what + // accepts the register-typed i1 values. + + // Do not use VecMatrixRegToMem as it may constant fold the conversion + // instruction, which is what we want to return. + DXASSERT(VecVal->getType()->getVectorElementType()->isIntegerTy(1), + "Vector matrix reg to mem type mismatch should only happen for bools."); + + llvm::Instruction *ConvInst = Builder.Insert(new ZExtInst(VecVal, VecMatMemTy)); + Builder.CreateStore(ConvInst, VecPtr); + return ConvInst; +} + Value *LowerGEPOnMatIndexListToIndex( llvm::GetElementPtrInst *GEP, ArrayRef IdxList) { IRBuilder<> Builder(GEP); @@ -508,7 +566,7 @@ Instruction *HLMatrixLowerPass::MatLdStToVec(CallInst *CI) { if (isa(matPtr) || GetIfMatrixGEPOfUDTAlloca(matPtr) || GetIfMatrixGEPOfUDTArg(matPtr, *m_pHLModule)) { Value *vecPtr = matToVecMap[cast(matPtr)]; - result = Builder.CreateLoad(vecPtr); + result = CreateVecMatrixLoad(vecPtr, matPtr->getType()->getPointerElementType(), Builder); } else result = MatIntrinsicToVec(CI); } break; @@ -519,9 +577,8 @@ Instruction *HLMatrixLowerPass::MatLdStToVec(CallInst *CI) { GetIfMatrixGEPOfUDTArg(matPtr, *m_pHLModule)) { Value *vecPtr = matToVecMap[cast(matPtr)]; Value *matVal = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); - Value *vecVal = - UndefValue::get(HLMatrixLower::LowerMatrixType(matVal->getType())); - result = Builder.CreateStore(vecVal, vecPtr); + Value *vecVal = UndefValue::get(HLMatrixLower::LowerMatrixType(matVal->getType())); + result = CreateVecMatrixStore(vecVal, vecPtr, matVal->getType(), Builder); } else result = MatIntrinsicToVec(CI); } break; @@ -905,11 +962,11 @@ void HLMatrixLowerPass::lowerToVec(Instruction *matInst) { IRBuilder<> AllocaBuilder(AI); if (Ty->isArrayTy()) { - Type *vecTy = HLMatrixLower::LowerMatrixArrayPointer(AI->getType()); + Type *vecTy = HLMatrixLower::LowerMatrixArrayPointer(AI->getType(), /*forMem*/ true); vecTy = vecTy->getPointerElementType(); vecVal = AllocaBuilder.CreateAlloca(vecTy, nullptr, AI->getName()); } else { - Type *vecTy = HLMatrixLower::LowerMatrixType(matTy); + Type *vecTy = HLMatrixLower::LowerMatrixType(matTy, /*forMem*/ true); vecVal = AllocaBuilder.CreateAlloca(vecTy, nullptr, AI->getName()); } // Update debug info. @@ -2059,7 +2116,8 @@ void HLMatrixLowerPass::TranslateMatArrayGEP(Value *matInst, // Skip the vector version. if (useCall->getType()->isVectorTy()) continue; - Value *newLd = Builder.CreateLoad(newGEP); + Type *matTy = useCall->getType(); + Value *newLd = CreateVecMatrixLoad(newGEP, matTy, Builder); DXASSERT(matToVecMap.count(useCall), "must have vec version"); Value *oldLd = matToVecMap[useCall]; // Delete the oldLd. @@ -2082,7 +2140,7 @@ void HLMatrixLowerPass::TranslateMatArrayGEP(Value *matInst, DXASSERT(matToVecMap.count(matInst), "must have vec version"); Value *vecVal = matToVecMap[matInst]; - Builder.CreateStore(vecVal, vecPtr); + CreateVecMatrixStore(vecVal, vecPtr, matVal->getType(), Builder); } break; } } break; @@ -2174,9 +2232,17 @@ void HLMatrixLowerPass::replaceMatWithVec(Value *matVal, // Load Already translated in lowerToVec. // Store val operand will be set by the val use. // Do nothing here. - } else if (StoreInst *stInst = dyn_cast(vecUser)) + } else if (StoreInst *stInst = dyn_cast(vecUser)) { + DXASSERT(vecVal->getType() == stInst->getValueOperand()->getType(), + "Mismatched vector matrix store value types."); stInst->setOperand(0, vecVal); - else + } else if (ZExtInst *zextInst = dyn_cast(vecUser)) { + // This happens when storing bool matrices, + // which must first undergo conversion from i1's to i32's. + DXASSERT(vecVal->getType() == zextInst->getOperand(0)->getType(), + "Mismatched vector matrix store value types."); + zextInst->setOperand(0, vecVal); + } else TrivialMatReplace(matVal, vecVal, useCall); } break; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index b0426f501..b2db0646a 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4988,7 +4988,8 @@ Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset, bool colMajor, OP *OP, const DataLayout &DL, IRBuilder<> &Builder) { unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row); + HLMatrixLower::GetMatrixInfo(matType, col, row); + Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/true)->getVectorElementType(); unsigned matSize = col * row; std::vector elts(matSize); Value *EltByteSize = ConstantInt::get( @@ -5001,7 +5002,9 @@ Value *TranslateConstBufMatLd(Type *matType, Value *handle, Value *offset, baseOffset = Builder.CreateAdd(baseOffset, EltByteSize); } - return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder); + Value* Vec = HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder); + Vec = HLMatrixLower::VecMatrixMemToReg(Vec, matType, Builder); + return Vec; } void TranslateCBGep(GetElementPtrInst *GEP, Value *handle, Value *baseOffset, @@ -5417,10 +5420,11 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx, Value *TranslateConstBufMatLdLegacy(Type *matType, Value *handle, Value *legacyIdx, bool colMajor, OP *OP, - const DataLayout &DL, + bool memElemRepr, const DataLayout &DL, IRBuilder<> &Builder) { unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row); + HLMatrixLower::GetMatrixInfo(matType, col, row); + Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/memElemRepr)->getVectorElementType(); unsigned matSize = col * row; std::vector elts(matSize); @@ -5506,8 +5510,9 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, Type *matType = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) ->getType() ->getPointerElementType(); + // This will replace a call, so we should use the register representation of elements Value *newLd = TranslateConstBufMatLdLegacy( - matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder); + matType, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/false, DL, Builder); CI->replaceAllUsesWith(newLd); CI->eraseFromParent(); } else if (group == HLOpcodeGroup::HLSubscript) { @@ -5534,8 +5539,9 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, Value *ldData = UndefValue::get(resultType); if (!dynamicIndexing) { + // This will replace a load or GEP, so we should use the memory representation of elements Value *matLd = TranslateConstBufMatLdLegacy( - matType, handle, legacyIdx, colMajor, hlslOP, DL, Builder); + matType, handle, legacyIdx, colMajor, hlslOP, /*memElemRepr*/true, DL, Builder); // The matLd is keep original layout, just use the idx calc in // EmitHLSLMatrixElement and EmitHLSLMatrixSubscript. switch (subOp) { @@ -6022,7 +6028,8 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder, Value *bufIdx, Value *baseOffset, bool colMajor, const DataLayout &DL) { unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row); + HLMatrixLower::GetMatrixInfo(matType, col, row); + Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/true)->getVectorElementType(); unsigned EltSize = DL.getTypeAllocSize(EltTy); Constant* alignment = OP->GetI32Const(EltSize); @@ -6054,14 +6061,20 @@ Value *TranslateStructBufMatLd(Type *matType, IRBuilder<> &Builder, offset = Builder.CreateAdd(offset, OP->GetU32Const(4 * EltSize)); } - return HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder); + Value *Vec = HLMatrixLower::BuildVector(EltTy, col * row, elts, Builder); + Vec = HLMatrixLower::VecMatrixMemToReg(Vec, matType, Builder); + return Vec; } void TranslateStructBufMatSt(Type *matType, IRBuilder<> &Builder, Value *handle, hlsl::OP *OP, Value *bufIdx, Value *baseOffset, Value *val, bool colMajor, const DataLayout &DL) { unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matType, col, row); + HLMatrixLower::GetMatrixInfo(matType, col, row); + Type *EltTy = HLMatrixLower::LowerMatrixType(matType, /*forMem*/true)->getVectorElementType(); + + val = HLMatrixLower::VecMatrixRegToMem(val, matType, Builder); + unsigned EltSize = DL.getTypeAllocSize(EltTy); Constant *Alignment = OP->GetI32Const(EltSize); Value *offset = baseOffset; diff --git a/lib/HLSL/HLSignatureLower.cpp b/lib/HLSL/HLSignatureLower.cpp index 648d775db..3228d487c 100644 --- a/lib/HLSL/HLSignatureLower.cpp +++ b/lib/HLSL/HLSignatureLower.cpp @@ -912,80 +912,54 @@ void GenerateInputOutputUserCall(InputOutputAccessInfo &info, Value *undefVertex DXASSERT_NOMSG(group == HLOpcodeGroup::HLMatLoadStore); HLMatLoadStoreOpcode matOp = static_cast(opcode); switch (matOp) { - case HLMatLoadStoreOpcode::ColMatLoad: { - IRBuilder<> LocalBuilder(CI); - Type *matTy = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) - ->getType() - ->getPointerElementType(); - unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matTy, col, row); - std::vector matElts(col * row); - for (unsigned c = 0; c < col; c++) { - Constant *constRowIdx = LocalBuilder.getInt32(c); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); - for (unsigned r = 0; r < row; r++) { - SmallVector args = {OpArg, ID, rowIdx, columnConsts[r]}; - if (vertexID) - args.emplace_back(vertexID); - - Value *input = LocalBuilder.CreateCall(ldStFunc, args); - unsigned matIdx = c * row + r; - matElts[matIdx] = input; - } - } - Value *newVec = - HLMatrixLower::BuildVector(EltTy, col * row, matElts, LocalBuilder); - CI->replaceAllUsesWith(newVec); - CI->eraseFromParent(); - } break; + case HLMatLoadStoreOpcode::ColMatLoad: case HLMatLoadStoreOpcode::RowMatLoad: { IRBuilder<> LocalBuilder(CI); Type *matTy = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) ->getType() ->getPointerElementType(); unsigned col, row; - Type *EltTy = HLMatrixLower::GetMatrixInfo(matTy, col, row); + HLMatrixLower::GetMatrixInfo(matTy, col, row); std::vector matElts(col * row); - for (unsigned r = 0; r < row; r++) { - Constant *constRowIdx = LocalBuilder.getInt32(r); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); - for (unsigned c = 0; c < col; c++) { - SmallVector args = {OpArg, ID, rowIdx, columnConsts[c]}; - if (vertexID) - args.emplace_back(vertexID); - Value *input = LocalBuilder.CreateCall(ldStFunc, args); - unsigned matIdx = r * col + c; - matElts[matIdx] = input; + if (matOp == HLMatLoadStoreOpcode::ColMatLoad) { + for (unsigned c = 0; c < col; c++) { + Constant *constRowIdx = LocalBuilder.getInt32(c); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned r = 0; r < row; r++) { + SmallVector args = { OpArg, ID, rowIdx, columnConsts[r] }; + if (vertexID) + args.emplace_back(vertexID); + + Value *input = LocalBuilder.CreateCall(ldStFunc, args); + unsigned matIdx = c * row + r; + matElts[matIdx] = input; + } + } + } else { + for (unsigned r = 0; r < row; r++) { + Constant *constRowIdx = LocalBuilder.getInt32(r); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned c = 0; c < col; c++) { + SmallVector args = { OpArg, ID, rowIdx, columnConsts[c] }; + if (vertexID) + args.emplace_back(vertexID); + + Value *input = LocalBuilder.CreateCall(ldStFunc, args); + unsigned matIdx = r * col + c; + matElts[matIdx] = input; + } } } + Value *newVec = - HLMatrixLower::BuildVector(EltTy, col * row, matElts, LocalBuilder); + HLMatrixLower::BuildVector(matElts[0]->getType(), col * row, matElts, LocalBuilder); + newVec = HLMatrixLower::VecMatrixMemToReg(newVec, matTy, LocalBuilder); + CI->replaceAllUsesWith(newVec); CI->eraseFromParent(); } break; - case HLMatLoadStoreOpcode::ColMatStore: { - IRBuilder<> LocalBuilder(CI); - Value *Val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); - Type *matTy = CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx) - ->getType() - ->getPointerElementType(); - unsigned col, row; - HLMatrixLower::GetMatrixInfo(matTy, col, row); - - for (unsigned c = 0; c < col; c++) { - Constant *constColIdx = LocalBuilder.getInt32(c); - Value *colIdx = LocalBuilder.CreateAdd(idxVal, constColIdx); - - for (unsigned r = 0; r < row; r++) { - unsigned matIdx = HLMatrixLower::GetColMajorIdx(r, c, row); - Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); - LocalBuilder.CreateCall(ldStFunc, - {OpArg, ID, colIdx, columnConsts[r], Elt}); - } - } - CI->eraseFromParent(); - } break; + case HLMatLoadStoreOpcode::ColMatStore: case HLMatLoadStoreOpcode::RowMatStore: { IRBuilder<> LocalBuilder(CI); Value *Val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); @@ -995,14 +969,30 @@ void GenerateInputOutputUserCall(InputOutputAccessInfo &info, Value *undefVertex unsigned col, row; HLMatrixLower::GetMatrixInfo(matTy, col, row); - for (unsigned r = 0; r < row; r++) { - Constant *constRowIdx = LocalBuilder.getInt32(r); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + Val = HLMatrixLower::VecMatrixRegToMem(Val, matTy, LocalBuilder); + + if (matOp == HLMatLoadStoreOpcode::ColMatStore) { for (unsigned c = 0; c < col; c++) { - unsigned matIdx = HLMatrixLower::GetRowMajorIdx(r, c, col); - Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); - LocalBuilder.CreateCall(ldStFunc, - {OpArg, ID, rowIdx, columnConsts[c], Elt}); + Constant *constColIdx = LocalBuilder.getInt32(c); + Value *colIdx = LocalBuilder.CreateAdd(idxVal, constColIdx); + + for (unsigned r = 0; r < row; r++) { + unsigned matIdx = HLMatrixLower::GetColMajorIdx(r, c, row); + Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); + LocalBuilder.CreateCall(ldStFunc, + { OpArg, ID, colIdx, columnConsts[r], Elt }); + } + } + } else { + for (unsigned r = 0; r < row; r++) { + Constant *constRowIdx = LocalBuilder.getInt32(r); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned c = 0; c < col; c++) { + unsigned matIdx = HLMatrixLower::GetRowMajorIdx(r, c, col); + Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); + LocalBuilder.CreateCall(ldStFunc, + { OpArg, ID, rowIdx, columnConsts[c], Elt }); + } } } CI->eraseFromParent(); diff --git a/tools/clang/lib/AST/ASTContext.cpp b/tools/clang/lib/AST/ASTContext.cpp index a695e9da5..af88bfa50 100644 --- a/tools/clang/lib/AST/ASTContext.cpp +++ b/tools/clang/lib/AST/ASTContext.cpp @@ -1568,7 +1568,6 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { // Vector align to its element. if (getLangOpts().HLSL) { Align = EltInfo.Align; - Width = Align * VT->getNumElements(); } // HLSL Change Ends. // If the alignment is not a power of 2, round up to the next power of 2. diff --git a/tools/clang/lib/Basic/Targets.cpp b/tools/clang/lib/Basic/Targets.cpp index fb9af0323..71e08a4e7 100644 --- a/tools/clang/lib/Basic/Targets.cpp +++ b/tools/clang/lib/Basic/Targets.cpp @@ -6991,9 +6991,7 @@ public: LongWidth = LongAlign = 32; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble; - BoolWidth = 32; - // To avoid member for alignment. - BoolAlign = 8; + BoolWidth = BoolAlign = 32; // using the Microsoft ABI. TheCXXABI.set(TargetCXXABI::Microsoft); diff --git a/tools/clang/lib/CodeGen/CGExpr.cpp b/tools/clang/lib/CodeGen/CGExpr.cpp index f23aa37f0..422feab4a 100644 --- a/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/tools/clang/lib/CodeGen/CGExpr.cpp @@ -1082,6 +1082,14 @@ static bool hasBooleanRepresentation(QualType Ty) { return false; } +// HLSL Change Begin. +static bool hasBooleanScalarOrVectorRepresentation(QualType Ty) { + if (hlsl::IsHLSLVecType(Ty)) + return hasBooleanRepresentation(hlsl::GetElementTypeOrType(Ty)); + return hasBooleanRepresentation(Ty); +} +// HLSL Change End. + static bool getRangeForType(CodeGenFunction &CGF, QualType Ty, llvm::APInt &Min, llvm::APInt &End, bool StrictEnums) { @@ -1233,30 +1241,31 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, } llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { - // Bool has a different representation in memory than in registers. - if (hasBooleanRepresentation(Ty)) { + // HLSL Change Begin. + // Bool scalar and vectors have a different representation in memory than in registers. + if (hasBooleanScalarOrVectorRepresentation(Ty)) { // This should really always be an i1, but sometimes it's already // an i8, and it's awkward to track those cases down. - if (Value->getType()->isIntegerTy(1)) + llvm::Type *ValTy = Value->getType(); + llvm::Type *VecElemTy = ValTy->isVectorTy() ? ValTy->getVectorElementType() : ValTy; + if (VecElemTy->isIntegerTy(1)) return Builder.CreateZExt(Value, ConvertTypeForMem(Ty), "frombool"); - assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) && - "wrong value rep of bool"); } + // HLSL Change End. return Value; } llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { - // Bool has a different representation in memory than in registers. - if (hasBooleanRepresentation(Ty)) { - assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) && - "wrong value rep of bool"); - // HLSL Change Begin. + // HLSL Change Begin. + // Bool scalar and vectors have a different representation in memory than in registers. + if (hasBooleanScalarOrVectorRepresentation(Ty)) { + llvm::Type *ValTy = Value->getType(); // Use ne v, 0 to convert to i1 instead of trunc. return Builder.CreateICmpNE( - Value, llvm::ConstantInt::get(Value->getType(), 0), "tobool"); - // HLSL Change End. + Value, llvm::ConstantVector::getNullValue(ValTy), "tobool"); } + // HLSL Change End. return Value; } @@ -1475,6 +1484,8 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { Load->setAlignment(LV.getAlignment().getQuantity()); llvm::Value *Vec = Load; + Vec = EmitFromMemory(Vec, LV.getType()); // HLSL Change + const llvm::Constant *Elts = LV.getExtVectorElts(); // If the result of the expression is a non-vector type, we must be extracting @@ -1748,7 +1759,10 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, const llvm::Constant *Elts = Dst.getExtVectorElts(); llvm::Value *SrcVal = Src.getScalarVal(); + // HLSL Change Starts + SrcVal = EmitToMemory(SrcVal, Dst.getType()); + const VectorType *VTy = Dst.getType()->getAs(); if (VTy == nullptr && getContext().getLangOpts().HLSL) VTy = @@ -2918,11 +2932,12 @@ CodeGenFunction::EmitHLSLVectorElementExpr(const HLSLVectorElementExpr *E) { assert(hlsl::IsHLSLVecType(E->getBase()->getType()) && "Result must be a vector"); llvm::Value *Vec = EmitScalarExpr(E->getBase()); + Vec = EmitToMemory(Vec, E->getBase()->getType()); // Store the vector to memory (because LValue wants an address). - llvm::Value *VecMem = CreateMemTemp(E->getBase()->getType()); - Builder.CreateStore(Vec, VecMem); - Base = MakeAddrLValue(VecMem, E->getBase()->getType()); + llvm::Value *VecMemPtr = CreateMemTemp(E->getBase()->getType()); + Builder.CreateStore(Vec, VecMemPtr); + Base = MakeAddrLValue(VecMemPtr, E->getBase()->getType()); } QualType type = diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index b71a9983d..66c0d79d0 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -3838,6 +3838,15 @@ static Value *CastLdValue(Value *Ptr, llvm::Type *FromTy, llvm::Type *ToTy, IRBu // Change scalar into vec1. Value *Vec1 = UndefValue::get(ToTy); return Builder.CreateInsertElement(Vec1, V, (uint64_t)0); + } else if (vecSize == 1 && FromTy->isIntegerTy() + && ToTy->getVectorElementType()->isIntegerTy(1)) { + // load(bitcast i32* to <1 x i1>*) + // Rewrite to + // insertelement(icmp ne (load i32*), 0) + Value *IntV = Builder.CreateLoad(Ptr); + Value *BoolV = Builder.CreateICmpNE(IntV, ConstantInt::get(IntV->getType(), 0), "tobool"); + Value *Vec1 = UndefValue::get(ToTy); + return Builder.CreateInsertElement(Vec1, BoolV, (uint64_t)0); } else if (FromTy->isVectorTy() && vecSize == 1) { Value *V = Builder.CreateLoad(Ptr); // VectorTrunc @@ -5468,15 +5477,20 @@ static void AddMissingCastOpsInInitList(SmallVector &elts, SmallVect static void StoreInitListToDestPtr(Value *DestPtr, SmallVector &elts, unsigned &idx, - QualType Type, CodeGenTypes &Types, bool bDefaultRowMajor, - CGBuilderTy &Builder, llvm::Module &M) { + QualType Type, bool bDefaultRowMajor, + CodeGenFunction &CGF, llvm::Module &M) { + CodeGenTypes &Types = CGF.getTypes(); + CGBuilderTy &Builder = CGF.Builder; + llvm::Type *Ty = DestPtr->getType()->getPointerElementType(); llvm::Type *i32Ty = llvm::Type::getInt32Ty(Ty->getContext()); if (Ty->isVectorTy()) { - Value *Result = UndefValue::get(Ty); - for (unsigned i = 0; i < Ty->getVectorNumElements(); i++) + llvm::Type *RegTy = CGF.ConvertType(Type); + Value *Result = UndefValue::get(RegTy); + for (unsigned i = 0; i < RegTy->getVectorNumElements(); i++) Result = Builder.CreateInsertElement(Result, elts[idx + i], i); + Result = CGF.EmitToMemory(Result, Type); Builder.CreateStore(Result, DestPtr); idx += Ty->getVectorNumElements(); } else if (HLMatrixLower::IsMatrixType(Ty)) { @@ -5541,8 +5555,8 @@ static void StoreInitListToDestPtr(Value *DestPtr, unsigned i = RL.getNonVirtualBaseLLVMFieldNo(BaseDecl); Constant *gepIdx = ConstantInt::get(i32Ty, i); Value *GEP = Builder.CreateInBoundsGEP(DestPtr, {zero, gepIdx}); - StoreInitListToDestPtr(GEP, elts, idx, parentTy, Types, - bDefaultRowMajor, Builder, M); + StoreInitListToDestPtr(GEP, elts, idx, parentTy, + bDefaultRowMajor, CGF, M); } } } @@ -5550,8 +5564,8 @@ static void StoreInitListToDestPtr(Value *DestPtr, unsigned i = RL.getLLVMFieldNo(field); Constant *gepIdx = ConstantInt::get(i32Ty, i); Value *GEP = Builder.CreateInBoundsGEP(DestPtr, {zero, gepIdx}); - StoreInitListToDestPtr(GEP, elts, idx, field->getType(), Types, - bDefaultRowMajor, Builder, M); + StoreInitListToDestPtr(GEP, elts, idx, field->getType(), + bDefaultRowMajor, CGF, M); } } } else if (Ty->isArrayTy()) { @@ -5560,8 +5574,8 @@ static void StoreInitListToDestPtr(Value *DestPtr, for (unsigned i = 0; i < Ty->getArrayNumElements(); i++) { Constant *gepIdx = ConstantInt::get(i32Ty, i); Value *GEP = Builder.CreateInBoundsGEP(DestPtr, {zero, gepIdx}); - StoreInitListToDestPtr(GEP, elts, idx, EltType, Types, bDefaultRowMajor, - Builder, M); + StoreInitListToDestPtr(GEP, elts, idx, EltType, bDefaultRowMajor, + CGF, M); } } else { DXASSERT(Ty->isSingleValueType(), "invalid type"); @@ -5741,8 +5755,8 @@ Value *CGMSHLSLRuntime::EmitHLSLInitListExpr(CodeGenFunction &CGF, InitListExpr ParamList.append(EltValList.begin(), EltValList.end()); idx = 0; bool bDefaultRowMajor = m_pHLModule->GetHLOptions().bDefaultRowMajor; - StoreInitListToDestPtr(DestPtr, EltValList, idx, ResultTy, CGF.getTypes(), - bDefaultRowMajor, CGF.Builder, TheModule); + StoreInitListToDestPtr(DestPtr, EltValList, idx, ResultTy, + bDefaultRowMajor, CGF, TheModule); return nullptr; } @@ -7077,15 +7091,10 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit( BasicBlock *InsertBlock = CGF.Builder.GetInsertBlock(); Function *F = InsertBlock->getParent(); - if (ParamTy->isBooleanType()) { - // Create i32 for bool. - ParamTy = CGM.getContext().IntTy; - } // Make sure the alloca is in entry block to stop inline create stacksave. IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F)); - tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertType(ParamTy)); + tmpArgAddr = AllocaBuilder.CreateAlloca(CGF.ConvertTypeForMem(ParamTy)); - // add it to local decl map TmpArgMap(tmpArg, tmpArgAddr); @@ -7164,6 +7173,8 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionCopyBack( else outVal = EmitHLSLMatrixLoad(CGF, tmpArgAddr, ParamTy); + outVal = CGF.EmitFromMemory(outVal, ParamTy); + llvm::Type *ToTy = CGF.ConvertType(ArgTy); llvm::Type *FromTy = outVal->getType(); Value *castVal = outVal; diff --git a/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/tools/clang/lib/CodeGen/CodeGenTypes.cpp index 425c35ff2..5641cfc8a 100644 --- a/tools/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/tools/clang/lib/CodeGen/CodeGenTypes.cpp @@ -109,15 +109,26 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD, /// a type. For example, the scalar representation for _Bool is i1, but the /// memory representation is usually i8 or i32, depending on the target. llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { + // HLSL Change Starts + if (hlsl::IsHLSLVecType(T)) { + // Vectors of bools in memory should become vectors of + // the memory representation of the elements. + // Clang doesn't do this for plain VectorTypes, + // which is fine otherwise a bool1x1 matrix would become + // [n x ] since array elements always have memory representation. + QualType ElemT = hlsl::GetElementTypeOrType(T); + return llvm::VectorType::get(ConvertTypeForMem(ElemT), hlsl::GetHLSLVecSize(T)); + } + llvm::Type *R = ConvertType(T); - // If this is a non-bool type, don't map it. - if (!R->isIntegerTy(1)) - return R; + if (R->isIntegerTy(1)) { + // Bools have a different representation in memory + return llvm::IntegerType::get(getLLVMContext(), (unsigned)Context.getTypeSize(T)); + } - // Otherwise, return an integer of the target-specified size. - return llvm::IntegerType::get(getLLVMContext(), - (unsigned)Context.getTypeSize(T)); + return R; + // HLSL Change Ends } diff --git a/tools/clang/test/CodeGenHLSL/RValSubscript.hlsl b/tools/clang/test/CodeGenHLSL/RValSubscript.hlsl index 231a55cb8..51432e370 100644 --- a/tools/clang/test/CodeGenHLSL/RValSubscript.hlsl +++ b/tools/clang/test/CodeGenHLSL/RValSubscript.hlsl @@ -5,7 +5,7 @@ // CHECK: i32 5) // CHECK: extractvalue // CHECK: , 2 -// CHECK: icmp eq +// CHECK: icmp ne // CHECK 0 // For (x4 < 3)[1] @@ -47,7 +47,7 @@ // CHECK: fcmp fast oeq // CHECK: fcmp fast oeq // CHECK: fcmp fast oeq -// CHECK: alloca [16 x i1] +// CHECK: alloca [16 x i32] float4x4 xt; diff --git a/tools/clang/test/CodeGenHLSL/quick-test/bool_loadbuf_storebuf_memrepr.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/bool_loadbuf_storebuf_memrepr.hlsl new file mode 100644 index 000000000..e566a1f0e --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/bool_loadbuf_storebuf_memrepr.hlsl @@ -0,0 +1,110 @@ +// RUN: %dxc -E main -T ps_6_0 -O0 %s | FileCheck %s + +// Ensure that bools are converted from/to their mem representation when loaded/stored in buffers + +// Constant buffer loads +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32 +// CHECK: extractvalue %dx.types.CBufRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 + +// Structured buffer loads +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32 +// CHECK: extractvalue %dx.types.ResRet.i32 +// CHECK: icmp ne i32 {{.*}}, 0 + +// Structured buffer stores +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: call void @dx.op.bufferStore.i32 + + +struct AllTheBools +{ + bool2x2 m; + bool2 v; + bool s; + bool2x2 ma[2]; + bool2 va[2]; + bool sa[2]; +}; + +ConstantBuffer cb; +StructuredBuffer sb; +RWStructuredBuffer rwsb; + +float main(int i : I) : SV_Target +{ + float result = 0; + + // Constant buffer loads + if (cb.m._22 && cb.v.y && cb.s + && cb.ma[1]._22 && cb.va[1].y && cb.sa[1]) + { + result++; + } + + // Structured buffer loads + if (sb[0].m._22 && sb[0].v.y && sb[0].s + && sb[0].ma[1]._22 && sb[0].va[1].y && sb[0].sa[1]) + { + result++; + } + + // Structured buffer stores + if (result >= 1.0f) + { + rwsb[0].m._22 = i == 42; + rwsb[0].v.y = i == 42; + rwsb[0].s = i == 42; + rwsb[0].ma[1]._22 = i == 42; + rwsb[0].va[1].y = i == 42; + rwsb[0].sa[1] = i == 42; + } + + return 0; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/quick-test/bool_memrepr.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/bool_memrepr.hlsl new file mode 100644 index 000000000..20d7ce08f --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/bool_memrepr.hlsl @@ -0,0 +1,52 @@ +// RUN: %dxc -E main -T ps_6_0 -O0 %s | FileCheck %s + +// Ensure that bools are converted from/to their memory representation when loaded/stored + +// Local variables should never be i1s +// CHECK-not: alloca {{.*}}i1 + +// Test stores +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 +// CHECK: icmp eq i32 {{.*}}, 42 +// CHECK: zext i1 {{.*}} to i32 +// CHECK: store i32 + +// Test loads +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: load i32 +// CHECK: icmp ne i32 {{.*}}, 0 + +float main(int i : I) : SV_Target +{ + bool s = i == 42; + bool1 v = i == 42; + bool1x1 m = i == 42; + bool sa[1] = { i == 42 }; + bool1 va[1] = { i == 42 }; + bool1x1 ma[1] = { i == 42 }; + + return s && v.x && m._11 && sa[0] && va[0].x && ma[0]._11 ? 1.0f : 2.0f; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/quick-test/bool_scalar_swizzle.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/bool_scalar_swizzle.hlsl new file mode 100644 index 000000000..9499f3ddc --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/bool_scalar_swizzle.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -E main -T ps_6_0 -O0 %s | FileCheck %s + +// This is mostly a regression test for a bug where a bitcast +// from i32* to i1* was emitted. + +// CHECK: alloca i32 +// CHECK: alloca [2 x i32] +// CHECK-NOT: bitcast + +float main() : SV_Target +{ + bool b = true; + bool2 b2 = b.xx; + return 0; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/quick-test/bool_stress.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/bool_stress.hlsl new file mode 100644 index 000000000..aacf9fd85 --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/quick-test/bool_stress.hlsl @@ -0,0 +1,54 @@ +// RUN: %dxc -E main -T vs_6_0 -O0 %s + +// Regression test for compiler crashes in complex bool cases + +struct AllTheBools +{ + bool b : B; + bool ba2[2] :BA2; + bool1 b1 : B1; + bool3 b3 : B3; + bool3 b3a2[2] : B3A2; + bool1x1 b1x1 : B1X1; + bool2x3 b2x3 : B2X3; + row_major bool2x3 rmb2x3 : RMB2X3; + bool2x3 b2x3a2[2] : B2X3A2; +}; + +ConstantBuffer cb; +StructuredBuffer sb; + +void not(in out bool value) { value = !value; } + +void not(in out bool2 value) +{ + value = !value; + not(value.x); + not(value.y); +} + +void not(in out bool3 value) +{ + not(value.xz); + not(value.y); +} + +AllTheBools main(AllTheBools input, float f : F) +{ + AllTheBools output; + output.b = input.b ? cb.b : sb[0].b; + output.ba2[1] = input.b; + output.ba2[0] = input.ba2[1]; + output.b1 = input.b3.y; + output.b3 = input.b.xxx; + output.b3a2 = sb[0].b3a2; + if (sb[0].b) return cb; + + output.b1x1 = cb.b2x3._22; + output.b2x3 = bool2x3(sb[0].b3, bool3(f > 2, input.b, false)); + output.rmb2x3 = input.b2x3; + not(output.rmb2x3[0]); + output.b2x3a2[1] = cb.b2x3; + output.b2x3a2[0] = input.b2x3; + return output; +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenHLSL/quick-test/global-var-write-test04.hlsl b/tools/clang/test/CodeGenHLSL/quick-test/global-var-write-test04.hlsl index a0440c733..a3392a0b5 100644 --- a/tools/clang/test/CodeGenHLSL/quick-test/global-var-write-test04.hlsl +++ b/tools/clang/test/CodeGenHLSL/quick-test/global-var-write-test04.hlsl @@ -7,7 +7,7 @@ // CHECK: {{.*g_v.*}} = external constant <4 x float>, align 4 // CHECK: {{.*g_m1.*}} = external constant %class.matrix.int.2.2, align 4 // CHECK: {{.*g_m2.*}} = external constant %class.matrix.int.2.2, align 4 -// CHECK: {{.*g_b.*}} = external constant i32, align 1 +// CHECK: {{.*g_b.*}} = external constant i32, align 4 // CHECK: {{.*g_a.*}} = external constant [5 x i32], align 4 // CHECK: {{.*g_a2d.*}} = external constant [3 x [2 x i32]], align 4 // CHECK-NOT: {{(.*g_s1.*)(.*static.copy.*)}} = internal global float 0.000000e+00