diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index bf6a2f504..e5d50e10c 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -14,12 +14,12 @@ #include "dxc/HLSL/HLMatrixType.h" #include "dxc/HLSL/HLOperations.h" #include "dxc/HLSL/HLModule.h" -#include "dxc/DXIL/DxilUtil.h" #include "dxc/HlslIntrinsicOp.h" #include "dxc/Support/Global.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilTypeSystem.h" #include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilUtil.h" #include "HLMatrixSubscriptUseReplacer.h" #include "llvm/IR/IRBuilder.h" @@ -141,6 +141,7 @@ private: void replaceAllUsesByLoweredValue(Instruction *MatInst, Value *VecVal); void replaceAllVariableUses(Value* MatPtr, Value* LoweredPtr); void replaceAllVariableUses(SmallVectorImpl &GEPIdxStack, Value *StackTopPtr, Value* LoweredPtr); + Value *translateScalarMatMul(Value *scalar, Value *mat, IRBuilder<> &Builder, bool isLhsScalar = true); void lowerGlobal(GlobalVariable *Global); Constant *lowerConstInitVal(Constant *Val); @@ -836,6 +837,35 @@ Value *HLMatrixLowerPass::lowerHLIntrinsic(CallInst *Call, IntrinsicOp Opcode) { LoweredRetTy, LoweredArgs, Builder); } +// Handles multiplcation of a scalar with a matrix +Value *HLMatrixLowerPass::translateScalarMatMul(Value *Lhs, Value *Rhs, IRBuilder<> &Builder, bool isLhsScalar) { + Value *Mat = isLhsScalar ? Rhs : Lhs; + Value *Scalar = isLhsScalar ? Lhs : Rhs; + Value* LoweredMat = getLoweredByValOperand(Mat, Builder); + Type *ScalarTy = Scalar->getType(); + + // Perform the scalar-matrix multiplication! + Type *ElemTy = LoweredMat->getType()->getVectorElementType(); + bool isIntMulOp = ScalarTy->isIntegerTy() && ElemTy->isIntegerTy(); + bool isFloatMulOp = ScalarTy->isFloatingPointTy() && ElemTy->isFloatingPointTy(); + DXASSERT(ScalarTy == ElemTy, "Scalar type must match the matrix component type."); + Value *Result = Builder.CreateVectorSplat(LoweredMat->getType()->getVectorNumElements(), Scalar); + + if (isFloatMulOp) { + // Preserve the order of operation for floats + Result = isLhsScalar ? Builder.CreateFMul(Result, LoweredMat) : Builder.CreateFMul(LoweredMat, Result); + } + else if (isIntMulOp) { + // Doesn't matter for integers but still preserve the order of operation + Result = isLhsScalar ? Builder.CreateMul(Result, LoweredMat) : Builder.CreateMul(LoweredMat, Result); + } + else { + DXASSERT(0, "Unknown type encountered when doing scalar-matrix multiplication."); + } + + return Result; +} + Value *HLMatrixLowerPass::lowerHLMulIntrinsic(Value* Lhs, Value *Rhs, bool Unsigned, IRBuilder<> &Builder) { HLMatrixType LhsMatTy = HLMatrixType::dyn_cast(Lhs->getType()); @@ -843,9 +873,16 @@ Value *HLMatrixLowerPass::lowerHLMulIntrinsic(Value* Lhs, Value *Rhs, Value* LoweredLhs = getLoweredByValOperand(Lhs, Builder); Value* LoweredRhs = getLoweredByValOperand(Rhs, Builder); + // Translate multiplication of scalar with matrix + bool isLhsScalar = !LoweredLhs->getType()->isVectorTy(); + bool isRhsScalar = !LoweredRhs->getType()->isVectorTy(); + bool isScalar = isLhsScalar || isRhsScalar; + if (isScalar) + return translateScalarMatMul(Lhs, Rhs, Builder, isLhsScalar); + DXASSERT(LoweredLhs->getType()->getScalarType() == LoweredRhs->getType()->getScalarType(), "Unexpected element type mismatch in mul intrinsic."); - DXASSERT(cast(LoweredLhs->getType()) && cast(LoweredLhs->getType()), + DXASSERT(cast(LoweredLhs->getType()) && cast(LoweredRhs->getType()), "Unexpected scalar in lowered matrix mul intrinsic operands."); Type* ElemTy = LoweredLhs->getType()->getScalarType(); diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index c173e5358..44d16dd96 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -2064,30 +2064,17 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0, return dotOP; } -Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) { +Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder, bool Unsigned = false) { + auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad; Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0); Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0); Value *Result = Builder.CreateMul(Elt0, Elt1); - switch (vecSize) { - case 4: - Elt0 = Builder.CreateExtractElement(arg0, 3); - Elt1 = Builder.CreateExtractElement(arg1, 3); - Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder); - // Pass thru. - case 3: - Elt0 = Builder.CreateExtractElement(arg0, 2); - Elt1 = Builder.CreateExtractElement(arg1, 2); - Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder); - // Pass thru. - case 2: - Elt0 = Builder.CreateExtractElement(arg0, 1); - Elt1 = Builder.CreateExtractElement(arg1, 1); - Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder); - break; - default: - case 1: - DXASSERT(vecSize == 1, "invalid vector size."); + for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) { + Elt0 = Builder.CreateExtractElement(arg0, iVecElt); + Elt1 = Builder.CreateExtractElement(arg1, iVecElt); + Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, Builder); } + return Result; } @@ -2641,7 +2628,7 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder); } else { - return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder); + return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul); } } else { @@ -5474,7 +5461,7 @@ IntrinsicLower gLowerTable[] = { { IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad}, { IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax}, { IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin }, - { IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul }, + { IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul }, { IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax }, { IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes }, { IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes }, diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul-vector-scalar.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul-vector-scalar.hlsl deleted file mode 100644 index 2610b9456..000000000 --- a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul-vector-scalar.hlsl +++ /dev/null @@ -1,50 +0,0 @@ -// RUN: %dxc -T vs_6_0 -E main -Od %s | FileCheck %s - -void main() { - - float3 fvec1 = { 0.1, 0.2, 0.3}; - float4 fvec2 = { 1.1, 1.2, 1.3, 1.4}; - float fx1 = 0.5; - float fx2 = 1.5; - -// CHECK: call float @dx.op.dot3.f32 - float4 a = mul(fvec1, fvec2); - -// CHECK: fmul fast float -// CHECK: fmul fast float -// CHECK: fmul fast float - float3 b = mul(fvec1, fx1); - -// CHECK: fmul fast float -// CHECK: fmul fast float -// CHECK: fmul fast float - float3 c = mul(fx1, fvec1); - -// CHECK: fmul fast float - float d = mul(fx1, fx2); - - int4 ivec1 = { 1, 2, 3, 4}; - int3 ivec2 = { 4, 5, 6}; - int i1 = 1; - int i2 = 2; - -// CHECK: mul i32 -// CHECK: call i32 @dx.op.tertiary.i32(i32 48, -// CHECK: call i32 @dx.op.tertiary.i32(i32 48, - int e = mul(ivec1, ivec2); - -// CHECK: mul i32 -// CHECK: mul i32 -// CHECK: mul i32 -// CHECK: mul i32 - int4 f = mul(ivec1, i1); - -// CHECK: mul i32 -// CHECK: mul i32 -// CHECK: mul i32 -// CHECK: mul i32 - int4 g = mul(i1, ivec1); - -// CHECK: mul i32 - int h = mul(i1, i2); -} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul.hlsl new file mode 100644 index 000000000..b966c4f78 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul.hlsl @@ -0,0 +1,993 @@ +// This file contains tests covering all overloads of mul intrinsic +// as documented here: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-mul + +// TODO: While testing overloads of mul() intrinsics, found some incorrect codegen for bool type. +// TODO: Matrix-matrix multiplication of unsigned ints uses IMad instead of UMad. Update the correct opcode once issue #2482 is fixed. +// Add coverage for bool type once the issue #2467 is fixed. + +// ***************************** +// float overloads +// ***************************** + +// vectors and scalars +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4 -DELEM_TY2=float4 -DRET_TY=float %s | FileCheck %s -check-prefix=FL4_OVRLD +// FL4_OVRLD: call float @dx.op.dot4.f32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float3 -DELEM_TY2=float3 -DRET_TY=float %s | FileCheck %s -check-prefix=FL3_OVRLD +// FL3_OVRLD: call float @dx.op.dot3.f32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2 -DELEM_TY2=float2 -DRET_TY=float %s | FileCheck %s -check-prefix=FL2_OVRLD +// FL2_OVRLD: call float @dx.op.dot2.f32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float -DRET_TY=float %s | FileCheck %s -check-prefix=FL_OVRLD +// FL_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4 -DELEM_TY2=float4 -DRET_TY=float %s | FileCheck %s -check-prefix=FL4_OVRLD_OD +// FL4_OVRLD_OD: call float @dx.op.dot4.f32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float3 -DELEM_TY2=float3 -DRET_TY=float %s | FileCheck %s -check-prefix=FL3_OVRLD_OD +// FL3_OVRLD_OD: call float @dx.op.dot3.f32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2 -DELEM_TY2=float2 -DRET_TY=float %s | FileCheck %s -check-prefix=FL2_OVRLD_OD +// FL2_OVRLD_OD: call float @dx.op.dot2.f32 + +// matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2x4 -DELEM_TY2=float4x3 -DRET_TY=float2x3 %s | FileCheck %s -check-prefix=FLMAT1_OVRLD +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: fmul fast float + +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: fmul fast float + +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: fmul fast float + +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float1x4 -DELEM_TY2=float4x1 -DRET_TY=float1x1 %s | FileCheck %s -check-prefix=FLMAT2_OVRLD +// FLMAT2_OVRLD: fmul fast float +// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2x4 -DELEM_TY2=float4x3 -DRET_TY=float2x3 %s | FileCheck %s -check-prefix=FLMAT1_OVRLD_OD +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: fmul fast float + +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: fmul fast float + +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: fmul fast float + +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT1_OVRLD_OD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float1x4 -DELEM_TY2=float4x1 -DRET_TY=float1x1 %s | FileCheck %s -check-prefix=FLMAT2_OVRLD_OD +// FLMAT2_OVRLD_OD: fmul fast float +// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 + +// mixed: scalar and vector +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL1_4_OVRLD +// FL1_4_OVRLD: fmul fast float +// FL1_4_OVRLD: fmul fast float +// FL1_4_OVRLD: fmul fast float +// FL1_4_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float3 -DELEM_TY2=float -DRET_TY=float3 %s | FileCheck %s -check-prefix=FL3_1_OVRLD +// FL3_1_OVRLD: fmul fast float +// FL3_1_OVRLD: fmul fast float +// FL3_1_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float -DELEM_TY2=float4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL1_4_OVRLD_OD +// FL1_4_OVRLD_OD: fmul fast float +// FL1_4_OVRLD_OD: fmul fast float +// FL1_4_OVRLD_OD: fmul fast float +// FL1_4_OVRLD_OD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float3 -DELEM_TY2=float -DRET_TY=float3 %s | FileCheck %s -check-prefix=FL3_1_OVRLD_OD +// FL3_1_OVRLD_OD: fmul fast float +// FL3_1_OVRLD_OD: fmul fast float +// FL3_1_OVRLD_OD: fmul fast float + +// mixed: scalar and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float2x4 -DRET_TY=float2x4 %s | FileCheck %s -check-prefix=FL1_MAT1_OVRLD +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float +// FL1_MAT1_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4x3 -DELEM_TY2=float -DRET_TY=float4x3 %s | FileCheck %s -check-prefix=FL1_MAT2_OVRLD +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float +// FL1_MAT2_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float2x4 -DRET_TY=float2x4 %s | FileCheck %s -check-prefix=FL1_MAT1_OVRLD_OD +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float +// FL1_MAT1_OVRLD_OD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4x3 -DELEM_TY2=float -DRET_TY=float4x3 %s | FileCheck %s -check-prefix=FL1_MAT2_OVRLD_OD +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32 +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float +// FL1_MAT2_OVRLD_OD: fmul fast float + +// mixed: vector and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2 -DELEM_TY2=float2x4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL2_MAT1_OVRLD +// FL2_MAT1_OVRLD: fmul fast float +// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD: fmul fast float +// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD: fmul fast float +// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4x3 -DELEM_TY2=float3 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL3_MAT2_OVRLD +// FL3_MAT2_OVRLD: fmul fast float +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: fmul fast float +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: fmul fast float +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: fmul fast float +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2 -DELEM_TY2=float2x4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL2_MAT1_OVRLD_OD +// FL2_MAT1_OVRLD_OD: fmul fast float +// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD_OD: fmul fast float +// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD_OD: fmul fast float +// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL2_MAT1_OVRLD_OD: fmul fast float + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4x3 -DELEM_TY2=float3 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL3_MAT2_OVRLD_OD +// FL3_MAT2_OVRLD_OD: fmul fast float +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: fmul fast float +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: fmul fast float +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: fmul fast float +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 +// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46 + +// ***************************** +// int overloads +// ***************************** + +// vectors and scalars +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4 -DELEM_TY2=int4 -DRET_TY=int %s | FileCheck %s -check-prefix=IN4_OVRLD +// IN4_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int3 -DELEM_TY2=int3 -DRET_TY=int %s | FileCheck %s -check-prefix=IN3_OVRLD +// IN3_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2 -DELEM_TY2=int2 -DRET_TY=int %s | FileCheck %s -check-prefix=IN2_OVRLD +// IN2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int -DRET_TY=int %s | FileCheck %s -check-prefix=IN_OVRLD +// IN_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4 -DELEM_TY2=int4 -DRET_TY=int %s | FileCheck %s -check-prefix=IN4_OVRLD_OD +// IN4_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int3 -DELEM_TY2=int3 -DRET_TY=int %s | FileCheck %s -check-prefix=IN3_OVRLD_OD +// IN3_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2 -DELEM_TY2=int2 -DRET_TY=int %s | FileCheck %s -check-prefix=IN2_OVRLD_OD +// IN2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2x4 -DELEM_TY2=int4x3 -DRET_TY=int2x3 %s | FileCheck %s -check-prefix=INMAT1_OVRLD +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: mul i32 + +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: mul i32 + +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: mul i32 + +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int1x4 -DELEM_TY2=int4x1 -DRET_TY=int1x1 %s | FileCheck %s -check-prefix=INMAT2_OVRLD +// INMAT2_OVRLD: mul i32 +// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2x4 -DELEM_TY2=int4x3 -DRET_TY=int2x3 %s | FileCheck %s -check-prefix=INMAT1_OVRLD_OD +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: mul i32 + +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: mul i32 + +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: mul i32 + +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int1x4 -DELEM_TY2=int4x1 -DRET_TY=int1x1 %s | FileCheck %s -check-prefix=INMAT2_OVRLD_OD +// INMAT2_OVRLD_OD: mul i32 +// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// mixed: scalar and vector +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN1_4_OVRLD +// IN1_4_OVRLD: mul i32 +// IN1_4_OVRLD: mul i32 +// IN1_4_OVRLD: mul i32 +// IN1_4_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int3 -DELEM_TY2=int -DRET_TY=int3 %s | FileCheck %s -check-prefix=IN3_1_OVRLD +// IN3_1_OVRLD: mul i32 +// IN3_1_OVRLD: mul i32 +// IN3_1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int -DELEM_TY2=int4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN1_4_OVRLD_OD +// IN1_4_OVRLD_OD: mul i32 +// IN1_4_OVRLD_OD: mul i32 +// IN1_4_OVRLD_OD: mul i32 +// IN1_4_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int3 -DELEM_TY2=int -DRET_TY=int3 %s | FileCheck %s -check-prefix=IN3_1_OVRLD_OD +// IN3_1_OVRLD_OD: mul i32 +// IN3_1_OVRLD_OD: mul i32 +// IN3_1_OVRLD_OD: mul i32 + +// mixed: scalar and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int2x4 -DRET_TY=int2x4 %s | FileCheck %s -check-prefix=IN1_MAT1_OVRLD +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 +// IN1_MAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4x3 -DELEM_TY2=int -DRET_TY=int4x3 %s | FileCheck %s -check-prefix=IN1_MAT2_OVRLD +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 +// IN1_MAT2_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int2x4 -DRET_TY=int2x4 %s | FileCheck %s -check-prefix=IN1_MAT1_OVRLD_OD +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 +// IN1_MAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4x3 -DELEM_TY2=int -DRET_TY=int4x3 %s | FileCheck %s -check-prefix=IN1_MAT2_OVRLD_OD +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 +// IN1_MAT2_OVRLD_OD: mul i32 + +// mixed: vector and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2 -DELEM_TY2=int2x4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN2_MAT1_OVRLD +// IN2_MAT1_OVRLD: mul i32 +// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD: mul i32 +// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD: mul i32 +// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4x3 -DELEM_TY2=int3 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN3_MAT2_OVRLD +// IN3_MAT2_OVRLD: mul i32 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: mul i32 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: mul i32 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: mul i32 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2 -DELEM_TY2=int2x4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN2_MAT1_OVRLD_OD +// IN2_MAT1_OVRLD_OD: mul i32 +// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD_OD: mul i32 +// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD_OD: mul i32 +// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN2_MAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4x3 -DELEM_TY2=int3 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN3_MAT2_OVRLD_OD +// IN3_MAT2_OVRLD_OD: mul i32 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: mul i32 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: mul i32 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: mul i32 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// ***************************** +// uint overloads +// ***************************** + +// vectors and scalars +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4 -DELEM_TY2=uint4 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN4_OVRLD +// UIN4_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint3 -DELEM_TY2=uint3 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN3_OVRLD +// UIN3_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2 -DELEM_TY2=uint2 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN2_OVRLD +// UIN2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN_OVRLD +// UIN_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4 -DELEM_TY2=uint4 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN4_OVRLD_OD +// UIN4_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint3 -DELEM_TY2=uint3 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN3_OVRLD_OD +// UIN3_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2 -DELEM_TY2=uint2 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN2_OVRLD_OD +// UIN2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 + +// matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2x4 -DELEM_TY2=uint4x3 -DRET_TY=uint2x3 %s | FileCheck %s -check-prefix=UINMAT1_OVRLD +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: mul i32 + +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: mul i32 + +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: mul i32 + +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint1x4 -DELEM_TY2=uint4x1 -DRET_TY=uint1x1 %s | FileCheck %s -check-prefix=UINMAT2_OVRLD +// UINMAT2_OVRLD: mul i32 +// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2x4 -DELEM_TY2=uint4x3 -DRET_TY=uint2x3 %s | FileCheck %s -check-prefix=UINMAT1_OVRLD_OD +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: mul i32 + +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: mul i32 + +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: mul i32 + +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint1x4 -DELEM_TY2=uint4x1 -DRET_TY=uint1x1 %s | FileCheck %s -check-prefix=UINMAT2_OVRLD_OD +// UINMAT2_OVRLD_OD: mul i32 +// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 +// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48 + +// mixed: scalar and vector +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN1_4_OVRLD +// UIN1_4_OVRLD: mul i32 +// UIN1_4_OVRLD: mul i32 +// UIN1_4_OVRLD: mul i32 +// UIN1_4_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint3 -DELEM_TY2=uint -DRET_TY=uint3 %s | FileCheck %s -check-prefix=UIN3_1_OVRLD +// UIN3_1_OVRLD: mul i32 +// UIN3_1_OVRLD: mul i32 +// UIN3_1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint -DELEM_TY2=uint4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN1_4_OVRLD_OD +// UIN1_4_OVRLD_OD: mul i32 +// UIN1_4_OVRLD_OD: mul i32 +// UIN1_4_OVRLD_OD: mul i32 +// UIN1_4_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint3 -DELEM_TY2=uint -DRET_TY=uint3 %s | FileCheck %s -check-prefix=UIN3_1_OVRLD_OD +// UIN3_1_OVRLD_OD: mul i32 +// UIN3_1_OVRLD_OD: mul i32 +// UIN3_1_OVRLD_OD: mul i32 + +// mixed: scalar and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint2x4 -DRET_TY=uint2x4 %s | FileCheck %s -check-prefix=UIN1_MAT1_OVRLD +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 +// UIN1_MAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4x3 -DELEM_TY2=uint -DRET_TY=uint4x3 %s | FileCheck %s -check-prefix=UIN1_MAT2_OVRLD +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 +// UIN1_MAT2_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint2x4 -DRET_TY=uint2x4 %s | FileCheck %s -check-prefix=UIN1_MAT1_OVRLD_OD +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 +// UIN1_MAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4x3 -DELEM_TY2=uint -DRET_TY=uint4x3 %s | FileCheck %s -check-prefix=UIN1_MAT2_OVRLD_OD +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 +// UIN1_MAT2_OVRLD_OD: mul i32 + +// mixed: vector and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2 -DELEM_TY2=uint2x4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN2_MAT1_OVRLD +// UIN2_MAT1_OVRLD: mul i32 +// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD: mul i32 +// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD: mul i32 +// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4x3 -DELEM_TY2=uint3 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN3_MAT2_OVRLD +// UIN3_MAT2_OVRLD: mul i32 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: mul i32 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: mul i32 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: mul i32 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2 -DELEM_TY2=uint2x4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN2_MAT1_OVRLD_OD +// UIN2_MAT1_OVRLD_OD: mul i32 +// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD_OD: mul i32 +// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD_OD: mul i32 +// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN2_MAT1_OVRLD_OD: mul i32 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4x3 -DELEM_TY2=uint3 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN3_MAT2_OVRLD_OD +// UIN3_MAT2_OVRLD_OD: mul i32 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: mul i32 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: mul i32 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: mul i32 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 +// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49 + + +// ***************************** +// min16float overloads +// ***************************** + +// vectors and scalars +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4 -DELEM_TY2=min16float4 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL4_OVRLD +// MNFL4_OVRLD: call half @dx.op.dot4.f16(i32 56 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float3 -DELEM_TY2=min16float3 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL3_OVRLD +// MNFL3_OVRLD: call half @dx.op.dot3.f16 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2 -DELEM_TY2=min16float2 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL2_OVRLD +// MNFL2_OVRLD: call half @dx.op.dot2.f16 + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL_OVRLD +// MNFL_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4 -DELEM_TY2=min16float4 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL4_OVRLD_OD +// MNFL4_OVRLD_OD: call half @dx.op.dot4.f16(i32 56 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float3 -DELEM_TY2=min16float3 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL3_OVRLD_OD +// MNFL3_OVRLD_OD: call half @dx.op.dot3.f16 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2 -DELEM_TY2=min16float2 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL2_OVRLD_OD +// MNFL2_OVRLD_OD: call half @dx.op.dot2.f16 + +// matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2x4 -DELEM_TY2=min16float4x3 -DRET_TY=min16float2x3 %s | FileCheck %s -check-prefix=MNFLMAT1_OVRLD +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: fmul fast half + +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: fmul fast half + +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: fmul fast half + +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float1x4 -DELEM_TY2=min16float4x1 -DRET_TY=min16float1x1 %s | FileCheck %s -check-prefix=MNFLMAT2_OVRLD +// MNFLMAT2_OVRLD: fmul fast half +// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2x4 -DELEM_TY2=min16float4x3 -DRET_TY=min16float2x3 %s | FileCheck %s -check-prefix=MNFLMAT1_OVRLD_OD +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: fmul fast half + +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: fmul fast half + +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: fmul fast half + +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT1_OVRLD_OD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float1x4 -DELEM_TY2=min16float4x1 -DRET_TY=min16float1x1 %s | FileCheck %s -check-prefix=MNFLMAT2_OVRLD_OD +// MNFLMAT2_OVRLD_OD: fmul fast half +// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 + +// mixed: scalar and vector +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL1_4_OVRLD +// MNFL1_4_OVRLD: fmul fast half +// MNFL1_4_OVRLD: fmul fast half +// MNFL1_4_OVRLD: fmul fast half +// MNFL1_4_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float3 -DELEM_TY2=min16float -DRET_TY=min16float3 %s | FileCheck %s -check-prefix=MNFL3_1_OVRLD +// MNFL3_1_OVRLD: fmul fast half +// MNFL3_1_OVRLD: fmul fast half +// MNFL3_1_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float -DELEM_TY2=min16float4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL1_4_OVRLD_OD +// MNFL1_4_OVRLD_OD: fmul fast half +// MNFL1_4_OVRLD_OD: fmul fast half +// MNFL1_4_OVRLD_OD: fmul fast half +// MNFL1_4_OVRLD_OD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float3 -DELEM_TY2=min16float -DRET_TY=min16float3 %s | FileCheck %s -check-prefix=MNFL3_1_OVRLD_OD +// MNFL3_1_OVRLD_OD: fmul fast half +// MNFL3_1_OVRLD_OD: fmul fast half +// MNFL3_1_OVRLD_OD: fmul fast half + +// mixed: scalar and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float2x4 -DRET_TY=min16float2x4 %s | FileCheck %s -check-prefix=MNFL1_MAT1_OVRLD +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half +// MNFL1_MAT1_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float -DRET_TY=min16float4x3 %s | FileCheck %s -check-prefix=MNFL1_MAT2_OVRLD +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half +// MNFL1_MAT2_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float2x4 -DRET_TY=min16float2x4 %s | FileCheck %s -check-prefix=MNFL1_MAT1_OVRLD_OD +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half +// MNFL1_MAT1_OVRLD_OD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float -DRET_TY=min16float4x3 %s | FileCheck %s -check-prefix=MNFL1_MAT2_OVRLD_OD +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16 +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half +// MNFL1_MAT2_OVRLD_OD: fmul fast half + +// mixed: vector and matrix +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2 -DELEM_TY2=min16float2x4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL2_MAT1_OVRLD +// MNFL2_MAT1_OVRLD: fmul fast half +// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD: fmul fast half +// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD: fmul fast half +// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float3 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL3_MAT2_OVRLD +// MNFL3_MAT2_OVRLD: fmul fast half +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: fmul fast half +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: fmul fast half +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: fmul fast half +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46 + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2 -DELEM_TY2=min16float2x4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL2_MAT1_OVRLD_OD +// MNFL2_MAT1_OVRLD_OD: fmul fast half +// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD_OD: fmul fast half +// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD_OD: fmul fast half +// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL2_MAT1_OVRLD_OD: fmul fast half + +// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float3 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL3_MAT2_OVRLD_OD +// MNFL3_MAT2_OVRLD_OD: fmul fast half +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: fmul fast half +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: fmul fast half +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: fmul fast half +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 +// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46 + +cbuffer CB { + ELEM_TY1 e1; + ELEM_TY2 e2; +}; + +RET_TY main(): OUT +{ + return mul(e1, e2); +} \ No newline at end of file diff --git a/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_literals.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_literals.hlsl new file mode 100644 index 000000000..0c440367b --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/intrinsics/mul/mul_literals.hlsl @@ -0,0 +1,167 @@ +// This file contains tests covering all overloads of mul intrinsic +// as documented here: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-mul + +// RUN: %dxc -T vs_6_0 -E main %s | FileCheck %s + +struct DS { + float f_1; + float f_2; + float f_3; + float f_4; + float2 f2_1; + float2 f2_2; + float3 f3_1; + float3 f3_2; + float4 f4_1; + float4 f4_2; + float2x3 fm_1; + float3x4 fm_2; + float2x4 fm_3; + + int i_1; + int i_2; + int i_3; + int i_4; + int2 i2_1; + int2 i2_2; + int3 i3_1; + int3 i3_2; + int4 i4_1; + int4 i4_2; + int2x3 im_1; + int3x4 im_2; + int2x4 im_3; + + uint u_1; + uint u_2; + uint u_3; + uint u_4; + uint2 u2_1; + uint2 u2_2; + uint3 u3_1; + uint3 u3_2; + uint4 u4_1; + uint4 u4_2; + uint2x3 um_1; + uint3x4 um_2; + uint2x4 um_3; +}; + +RWStructuredBuffer SB; + +void main() +{ + //*************** + // float overloads + //*************** + + // scalar-scalar + // CHECK: float 1.000000e+01 + SB[0].f_1 = mul(2, 5); + + // vector-vector + // CHECK: float 1.100000e+01 + SB[0].f_2 = mul(float2(1, 2), float2(3, 4)); + + // CHECK: float 2.300000e+01 + SB[0].f_3 = mul(float3(1, 2, 3), float3(3, 4, 4)); + + // CHECK: float 3.200000e+01 + SB[0].f_4 = mul(float4(1, 2, 2, 1), float4(3, 4, 8, 5)); + + // scalar-vector + // CHECK: float 2.000000e+00, float 6.000000e+00 + SB[0].f2_1 = mul(2, float2(1, 3)); + + // CHECK: float 3.000000e+00, float 9.000000e+00, float 0.000000e+00 + SB[0].f3_1 = mul(float3(1, 3, 0), 3); + + // CHECK: float 4.000000e+01, float 3.000000e+01, float 1.000000e+01, float 3.000000e+01 + SB[0].f4_1 = mul(10, float4(4, 3, 1, 3)); + + float2x3 m1 = {1, 2, 3, 4, 5, 6}; + float3x4 m2 = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; + float2x4 m3 = {2, 3, 4, 5, 6, 7, 8, 9}; + + // scalar-matrix + // CHECK: float 2.000000e+00, float 8.000000e+00, float 4.000000e+00, float 1.000000e+01 + // CHECK: float 6.000000e+00, float 1.200000e+01 + SB[0].fm_1 = mul(2, m1); + + // CHECK: float 1.330000e+02, float 2.090000e+02, float 2.850000e+02, float 1.520000e+02 + // CHECK: float 2.280000e+02, float 3.040000e+02, float 1.710000e+02, float 2.470000e+02 + // CHECK: float 3.230000e+02, float 1.900000e+02, float 2.660000e+02, float 3.420000e+02 + SB[0].fm_2 = mul(m2, 19); + + // matrix-matrix + // CHECK: float 7.400000e+01, float 1.730000e+02, float 8.000000e+01, float 1.880000e+02 + // CHECK: float 8.600000e+01, float 2.030000e+02, float 9.200000e+01, float 2.180000e+02 + SB[0].fm_3 = mul(m1, m2); + + // vector-matrix + // CHECK: float 1.400000e+01, float 3.200000e+01 + SB[0].f2_2 = mul(m1, float3(1, 2, 3)); + + // CHECK: float 9.000000e+00, float 1.200000e+01, float 1.500000e+01 + SB[0].f3_2 = mul(float2(1, 2), m1); + + // CHECK: float 1.400000e+01, float 1.700000e+01, float 2.000000e+01, float 2.300000e+01 + SB[0].f4_2 = mul(float2(1, 2), m3); + + //*************** + // int overloads + //*************** + + // scalar-scalar + // CHECK: i32 -10 + SB[0].i_1 = mul(2, -5); + + // vector-vector + // CHECK: i32 -5 + SB[0].i_2 = mul(int2(1, -2), int2(3, 4)); + + // CHECK: i32 7 + SB[0].i_3 = mul(int3(1, 2, 3), int3(3, -4, 4)); + + // CHECK: i32 -16 + SB[0].i_4 = mul(int4(1, -2, 2, 1), int4(3, 4, -8, 5)); + + // scalar-vector + // CHECK: i32 2, i32 -6 + SB[0].i2_1 = mul(2, int2(1, -3)); + + // CHECK: i32 3, i32 -9, i32 0 + SB[0].i3_1 = mul(int3(1, -3, 0), 3); + + // CHECK: i32 40, i32 30, i32 10, i32 30 + SB[0].i4_1 = mul(10, int4(4, 3, 1, 3)); + + int2x3 im1 = {1, 2, -3, 4, -5, 6}; + int3x4 im2 = {7, 8, 9, 10, -11, 12, 13, 14, -15, 16, -17, 18}; + int2x4 im3 = {2, 3, 4, -5, 6, 7, -8, -9}; + + // scalar-matrix + // CHECK: i32 -2, i32 -8, i32 -4, i32 10 + // CHECK: i32 6, i32 -12 + SB[0].im_1 = mul(-2, im1); + + // CHECK: i32 133, i32 -209, i32 -285, i32 152 + // CHECK: i32 228, i32 304, i32 171, i32 247 + // CHECK: i32 -323, i32 190, i32 266, i32 342 + SB[0].im_2 = mul(im2, 19); + + // matrix-matrix + // CHECK: i32 30, i32 -7, i32 -16, i32 68 + // CHECK: i32 86, i32 -131, i32 -16, i32 78 + SB[0].im_3 = mul(im1, im2); + + // vector-matrix + // CHECK: i32 -12, i32 32 + SB[0].i2_2 = mul(im1, int3(1, -2, 3)); + + // CHECK: i32 9, i32 -8, i32 9 + SB[0].i3_2 = mul(int2(1, 2), im1); + + // CHECK: i32 10, i32 11, i32 -20, i32 -13 + SB[0].i4_2 = mul(int2(-1, 2), im3); +} \ No newline at end of file diff --git a/tools/clang/unittests/HLSL/FileCheckerTest.cpp b/tools/clang/unittests/HLSL/FileCheckerTest.cpp index 289e3eab0..7ed115f55 100644 --- a/tools/clang/unittests/HLSL/FileCheckerTest.cpp +++ b/tools/clang/unittests/HLSL/FileCheckerTest.cpp @@ -722,6 +722,9 @@ public: // Iterate over all RUN lines for (auto &cmd : cmds) { RunFileCheckFromCommands(cmd.c_str(), fileName); + // If any of the RUN cmd fails then skip executing remaining cmds + // and report the error + if (this->RunResult != 0) break; } }