Fix compilation failures involving mul intrinsic overloads (#2470)

This commit is contained in:
Vishal Sharma 2019-09-23 12:46:57 -07:00 коммит произвёл GitHub
Родитель f662b34f51
Коммит da9a66f768
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 1211 добавлений и 74 удалений

Просмотреть файл

@ -14,12 +14,12 @@
#include "dxc/HLSL/HLMatrixType.h"
#include "dxc/HLSL/HLOperations.h"
#include "dxc/HLSL/HLModule.h"
#include "dxc/DXIL/DxilUtil.h"
#include "dxc/HlslIntrinsicOp.h"
#include "dxc/Support/Global.h"
#include "dxc/DXIL/DxilOperations.h"
#include "dxc/DXIL/DxilTypeSystem.h"
#include "dxc/DXIL/DxilModule.h"
#include "dxc/DXIL/DxilUtil.h"
#include "HLMatrixSubscriptUseReplacer.h"
#include "llvm/IR/IRBuilder.h"
@ -141,6 +141,7 @@ private:
void replaceAllUsesByLoweredValue(Instruction *MatInst, Value *VecVal);
void replaceAllVariableUses(Value* MatPtr, Value* LoweredPtr);
void replaceAllVariableUses(SmallVectorImpl<Value*> &GEPIdxStack, Value *StackTopPtr, Value* LoweredPtr);
Value *translateScalarMatMul(Value *scalar, Value *mat, IRBuilder<> &Builder, bool isLhsScalar = true);
void lowerGlobal(GlobalVariable *Global);
Constant *lowerConstInitVal(Constant *Val);
@ -836,6 +837,35 @@ Value *HLMatrixLowerPass::lowerHLIntrinsic(CallInst *Call, IntrinsicOp Opcode) {
LoweredRetTy, LoweredArgs, Builder);
}
// Handles multiplcation of a scalar with a matrix
Value *HLMatrixLowerPass::translateScalarMatMul(Value *Lhs, Value *Rhs, IRBuilder<> &Builder, bool isLhsScalar) {
Value *Mat = isLhsScalar ? Rhs : Lhs;
Value *Scalar = isLhsScalar ? Lhs : Rhs;
Value* LoweredMat = getLoweredByValOperand(Mat, Builder);
Type *ScalarTy = Scalar->getType();
// Perform the scalar-matrix multiplication!
Type *ElemTy = LoweredMat->getType()->getVectorElementType();
bool isIntMulOp = ScalarTy->isIntegerTy() && ElemTy->isIntegerTy();
bool isFloatMulOp = ScalarTy->isFloatingPointTy() && ElemTy->isFloatingPointTy();
DXASSERT(ScalarTy == ElemTy, "Scalar type must match the matrix component type.");
Value *Result = Builder.CreateVectorSplat(LoweredMat->getType()->getVectorNumElements(), Scalar);
if (isFloatMulOp) {
// Preserve the order of operation for floats
Result = isLhsScalar ? Builder.CreateFMul(Result, LoweredMat) : Builder.CreateFMul(LoweredMat, Result);
}
else if (isIntMulOp) {
// Doesn't matter for integers but still preserve the order of operation
Result = isLhsScalar ? Builder.CreateMul(Result, LoweredMat) : Builder.CreateMul(LoweredMat, Result);
}
else {
DXASSERT(0, "Unknown type encountered when doing scalar-matrix multiplication.");
}
return Result;
}
Value *HLMatrixLowerPass::lowerHLMulIntrinsic(Value* Lhs, Value *Rhs,
bool Unsigned, IRBuilder<> &Builder) {
HLMatrixType LhsMatTy = HLMatrixType::dyn_cast(Lhs->getType());
@ -843,9 +873,16 @@ Value *HLMatrixLowerPass::lowerHLMulIntrinsic(Value* Lhs, Value *Rhs,
Value* LoweredLhs = getLoweredByValOperand(Lhs, Builder);
Value* LoweredRhs = getLoweredByValOperand(Rhs, Builder);
// Translate multiplication of scalar with matrix
bool isLhsScalar = !LoweredLhs->getType()->isVectorTy();
bool isRhsScalar = !LoweredRhs->getType()->isVectorTy();
bool isScalar = isLhsScalar || isRhsScalar;
if (isScalar)
return translateScalarMatMul(Lhs, Rhs, Builder, isLhsScalar);
DXASSERT(LoweredLhs->getType()->getScalarType() == LoweredRhs->getType()->getScalarType(),
"Unexpected element type mismatch in mul intrinsic.");
DXASSERT(cast<VectorType>(LoweredLhs->getType()) && cast<VectorType>(LoweredLhs->getType()),
DXASSERT(cast<VectorType>(LoweredLhs->getType()) && cast<VectorType>(LoweredRhs->getType()),
"Unexpected scalar in lowered matrix mul intrinsic operands.");
Type* ElemTy = LoweredLhs->getType()->getScalarType();

Просмотреть файл

@ -2064,30 +2064,17 @@ Value *TrivialDotOperation(OP::OpCode opcode, Value *src0,
return dotOP;
}
Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder) {
Value *TranslateIDot(Value *arg0, Value *arg1, unsigned vecSize, hlsl::OP *hlslOP, IRBuilder<> &Builder, bool Unsigned = false) {
auto madOpCode = Unsigned ? DXIL::OpCode::UMad : DXIL::OpCode::IMad;
Value *Elt0 = Builder.CreateExtractElement(arg0, (uint64_t)0);
Value *Elt1 = Builder.CreateExtractElement(arg1, (uint64_t)0);
Value *Result = Builder.CreateMul(Elt0, Elt1);
switch (vecSize) {
case 4:
Elt0 = Builder.CreateExtractElement(arg0, 3);
Elt1 = Builder.CreateExtractElement(arg1, 3);
Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
// Pass thru.
case 3:
Elt0 = Builder.CreateExtractElement(arg0, 2);
Elt1 = Builder.CreateExtractElement(arg1, 2);
Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
// Pass thru.
case 2:
Elt0 = Builder.CreateExtractElement(arg0, 1);
Elt1 = Builder.CreateExtractElement(arg1, 1);
Result = TrivialDxilTrinaryOperation(DXIL::OpCode::IMad, Elt0, Elt1, Result, hlslOP, Builder);
break;
default:
case 1:
DXASSERT(vecSize == 1, "invalid vector size.");
for (unsigned iVecElt = 1; iVecElt < vecSize; ++iVecElt) {
Elt0 = Builder.CreateExtractElement(arg0, iVecElt);
Elt1 = Builder.CreateExtractElement(arg1, iVecElt);
Result = TrivialDxilTrinaryOperation(madOpCode, Elt0, Elt1, Result, hlslOP, Builder);
}
return Result;
}
@ -2641,7 +2628,7 @@ Value *TranslateMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
return TranslateFDot(arg0, arg1, vecSize, hlslOP, Builder);
}
else {
return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder);
return TranslateIDot(arg0, arg1, vecSize, hlslOP, Builder, IOP == IntrinsicOp::IOP_umul);
}
}
else {
@ -5474,7 +5461,7 @@ IntrinsicLower gLowerTable[] = {
{ IntrinsicOp::IOP_umad, TranslateFUITrinary, DXIL::OpCode::UMad},
{ IntrinsicOp::IOP_umax, TranslateFUIBinary, DXIL::OpCode::UMax},
{ IntrinsicOp::IOP_umin, TranslateFUIBinary, DXIL::OpCode::UMin },
{ IntrinsicOp::IOP_umul, TranslateFUIBinary, DXIL::OpCode::UMul },
{ IntrinsicOp::IOP_umul, TranslateMul, DXIL::OpCode::UMul },
{ IntrinsicOp::IOP_usign, TranslateUSign, DXIL::OpCode::UMax },
{ IntrinsicOp::MOP_InterlockedUMax, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },
{ IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes },

Просмотреть файл

@ -1,50 +0,0 @@
// RUN: %dxc -T vs_6_0 -E main -Od %s | FileCheck %s
void main() {
float3 fvec1 = { 0.1, 0.2, 0.3};
float4 fvec2 = { 1.1, 1.2, 1.3, 1.4};
float fx1 = 0.5;
float fx2 = 1.5;
// CHECK: call float @dx.op.dot3.f32
float4 a = mul(fvec1, fvec2);
// CHECK: fmul fast float
// CHECK: fmul fast float
// CHECK: fmul fast float
float3 b = mul(fvec1, fx1);
// CHECK: fmul fast float
// CHECK: fmul fast float
// CHECK: fmul fast float
float3 c = mul(fx1, fvec1);
// CHECK: fmul fast float
float d = mul(fx1, fx2);
int4 ivec1 = { 1, 2, 3, 4};
int3 ivec2 = { 4, 5, 6};
int i1 = 1;
int i2 = 2;
// CHECK: mul i32
// CHECK: call i32 @dx.op.tertiary.i32(i32 48,
// CHECK: call i32 @dx.op.tertiary.i32(i32 48,
int e = mul(ivec1, ivec2);
// CHECK: mul i32
// CHECK: mul i32
// CHECK: mul i32
// CHECK: mul i32
int4 f = mul(ivec1, i1);
// CHECK: mul i32
// CHECK: mul i32
// CHECK: mul i32
// CHECK: mul i32
int4 g = mul(i1, ivec1);
// CHECK: mul i32
int h = mul(i1, i2);
}

Просмотреть файл

@ -0,0 +1,993 @@
// This file contains tests covering all overloads of mul intrinsic
// as documented here: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-mul
// TODO: While testing overloads of mul() intrinsics, found some incorrect codegen for bool type.
// TODO: Matrix-matrix multiplication of unsigned ints uses IMad instead of UMad. Update the correct opcode once issue #2482 is fixed.
// Add coverage for bool type once the issue #2467 is fixed.
// *****************************
// float overloads
// *****************************
// vectors and scalars
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4 -DELEM_TY2=float4 -DRET_TY=float %s | FileCheck %s -check-prefix=FL4_OVRLD
// FL4_OVRLD: call float @dx.op.dot4.f32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float3 -DELEM_TY2=float3 -DRET_TY=float %s | FileCheck %s -check-prefix=FL3_OVRLD
// FL3_OVRLD: call float @dx.op.dot3.f32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2 -DELEM_TY2=float2 -DRET_TY=float %s | FileCheck %s -check-prefix=FL2_OVRLD
// FL2_OVRLD: call float @dx.op.dot2.f32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float -DRET_TY=float %s | FileCheck %s -check-prefix=FL_OVRLD
// FL_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4 -DELEM_TY2=float4 -DRET_TY=float %s | FileCheck %s -check-prefix=FL4_OVRLD_OD
// FL4_OVRLD_OD: call float @dx.op.dot4.f32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float3 -DELEM_TY2=float3 -DRET_TY=float %s | FileCheck %s -check-prefix=FL3_OVRLD_OD
// FL3_OVRLD_OD: call float @dx.op.dot3.f32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2 -DELEM_TY2=float2 -DRET_TY=float %s | FileCheck %s -check-prefix=FL2_OVRLD_OD
// FL2_OVRLD_OD: call float @dx.op.dot2.f32
// matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2x4 -DELEM_TY2=float4x3 -DRET_TY=float2x3 %s | FileCheck %s -check-prefix=FLMAT1_OVRLD
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: fmul fast float
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: fmul fast float
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: fmul fast float
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float1x4 -DELEM_TY2=float4x1 -DRET_TY=float1x1 %s | FileCheck %s -check-prefix=FLMAT2_OVRLD
// FLMAT2_OVRLD: fmul fast float
// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FLMAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2x4 -DELEM_TY2=float4x3 -DRET_TY=float2x3 %s | FileCheck %s -check-prefix=FLMAT1_OVRLD_OD
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: fmul fast float
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: fmul fast float
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: fmul fast float
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT1_OVRLD_OD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float1x4 -DELEM_TY2=float4x1 -DRET_TY=float1x1 %s | FileCheck %s -check-prefix=FLMAT2_OVRLD_OD
// FLMAT2_OVRLD_OD: fmul fast float
// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FLMAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// mixed: scalar and vector
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL1_4_OVRLD
// FL1_4_OVRLD: fmul fast float
// FL1_4_OVRLD: fmul fast float
// FL1_4_OVRLD: fmul fast float
// FL1_4_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float3 -DELEM_TY2=float -DRET_TY=float3 %s | FileCheck %s -check-prefix=FL3_1_OVRLD
// FL3_1_OVRLD: fmul fast float
// FL3_1_OVRLD: fmul fast float
// FL3_1_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float -DELEM_TY2=float4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL1_4_OVRLD_OD
// FL1_4_OVRLD_OD: fmul fast float
// FL1_4_OVRLD_OD: fmul fast float
// FL1_4_OVRLD_OD: fmul fast float
// FL1_4_OVRLD_OD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float3 -DELEM_TY2=float -DRET_TY=float3 %s | FileCheck %s -check-prefix=FL3_1_OVRLD_OD
// FL3_1_OVRLD_OD: fmul fast float
// FL3_1_OVRLD_OD: fmul fast float
// FL3_1_OVRLD_OD: fmul fast float
// mixed: scalar and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float2x4 -DRET_TY=float2x4 %s | FileCheck %s -check-prefix=FL1_MAT1_OVRLD
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// FL1_MAT1_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4x3 -DELEM_TY2=float -DRET_TY=float4x3 %s | FileCheck %s -check-prefix=FL1_MAT2_OVRLD
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// FL1_MAT2_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float -DELEM_TY2=float2x4 -DRET_TY=float2x4 %s | FileCheck %s -check-prefix=FL1_MAT1_OVRLD_OD
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// FL1_MAT1_OVRLD_OD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4x3 -DELEM_TY2=float -DRET_TY=float4x3 %s | FileCheck %s -check-prefix=FL1_MAT2_OVRLD_OD
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f32
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// FL1_MAT2_OVRLD_OD: fmul fast float
// mixed: vector and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float2 -DELEM_TY2=float2x4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL2_MAT1_OVRLD
// FL2_MAT1_OVRLD: fmul fast float
// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD: fmul fast float
// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD: fmul fast float
// FL2_MAT1_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=float4x3 -DELEM_TY2=float3 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL3_MAT2_OVRLD
// FL3_MAT2_OVRLD: fmul fast float
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: fmul fast float
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: fmul fast float
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: fmul fast float
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD: call float @dx.op.tertiary.f32(i32 46
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float2 -DELEM_TY2=float2x4 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL2_MAT1_OVRLD_OD
// FL2_MAT1_OVRLD_OD: fmul fast float
// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD_OD: fmul fast float
// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD_OD: fmul fast float
// FL2_MAT1_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL2_MAT1_OVRLD_OD: fmul fast float
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=float4x3 -DELEM_TY2=float3 -DRET_TY=float4 %s | FileCheck %s -check-prefix=FL3_MAT2_OVRLD_OD
// FL3_MAT2_OVRLD_OD: fmul fast float
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: fmul fast float
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: fmul fast float
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: fmul fast float
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// FL3_MAT2_OVRLD_OD: call float @dx.op.tertiary.f32(i32 46
// *****************************
// int overloads
// *****************************
// vectors and scalars
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4 -DELEM_TY2=int4 -DRET_TY=int %s | FileCheck %s -check-prefix=IN4_OVRLD
// IN4_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int3 -DELEM_TY2=int3 -DRET_TY=int %s | FileCheck %s -check-prefix=IN3_OVRLD
// IN3_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2 -DELEM_TY2=int2 -DRET_TY=int %s | FileCheck %s -check-prefix=IN2_OVRLD
// IN2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int -DRET_TY=int %s | FileCheck %s -check-prefix=IN_OVRLD
// IN_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4 -DELEM_TY2=int4 -DRET_TY=int %s | FileCheck %s -check-prefix=IN4_OVRLD_OD
// IN4_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int3 -DELEM_TY2=int3 -DRET_TY=int %s | FileCheck %s -check-prefix=IN3_OVRLD_OD
// IN3_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2 -DELEM_TY2=int2 -DRET_TY=int %s | FileCheck %s -check-prefix=IN2_OVRLD_OD
// IN2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2x4 -DELEM_TY2=int4x3 -DRET_TY=int2x3 %s | FileCheck %s -check-prefix=INMAT1_OVRLD
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: mul i32
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: mul i32
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: mul i32
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int1x4 -DELEM_TY2=int4x1 -DRET_TY=int1x1 %s | FileCheck %s -check-prefix=INMAT2_OVRLD
// INMAT2_OVRLD: mul i32
// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2x4 -DELEM_TY2=int4x3 -DRET_TY=int2x3 %s | FileCheck %s -check-prefix=INMAT1_OVRLD_OD
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: mul i32
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: mul i32
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: mul i32
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int1x4 -DELEM_TY2=int4x1 -DRET_TY=int1x1 %s | FileCheck %s -check-prefix=INMAT2_OVRLD_OD
// INMAT2_OVRLD_OD: mul i32
// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// INMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// mixed: scalar and vector
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN1_4_OVRLD
// IN1_4_OVRLD: mul i32
// IN1_4_OVRLD: mul i32
// IN1_4_OVRLD: mul i32
// IN1_4_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int3 -DELEM_TY2=int -DRET_TY=int3 %s | FileCheck %s -check-prefix=IN3_1_OVRLD
// IN3_1_OVRLD: mul i32
// IN3_1_OVRLD: mul i32
// IN3_1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int -DELEM_TY2=int4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN1_4_OVRLD_OD
// IN1_4_OVRLD_OD: mul i32
// IN1_4_OVRLD_OD: mul i32
// IN1_4_OVRLD_OD: mul i32
// IN1_4_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int3 -DELEM_TY2=int -DRET_TY=int3 %s | FileCheck %s -check-prefix=IN3_1_OVRLD_OD
// IN3_1_OVRLD_OD: mul i32
// IN3_1_OVRLD_OD: mul i32
// IN3_1_OVRLD_OD: mul i32
// mixed: scalar and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int2x4 -DRET_TY=int2x4 %s | FileCheck %s -check-prefix=IN1_MAT1_OVRLD
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// IN1_MAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4x3 -DELEM_TY2=int -DRET_TY=int4x3 %s | FileCheck %s -check-prefix=IN1_MAT2_OVRLD
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// IN1_MAT2_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int -DELEM_TY2=int2x4 -DRET_TY=int2x4 %s | FileCheck %s -check-prefix=IN1_MAT1_OVRLD_OD
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// IN1_MAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4x3 -DELEM_TY2=int -DRET_TY=int4x3 %s | FileCheck %s -check-prefix=IN1_MAT2_OVRLD_OD
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// IN1_MAT2_OVRLD_OD: mul i32
// mixed: vector and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int2 -DELEM_TY2=int2x4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN2_MAT1_OVRLD
// IN2_MAT1_OVRLD: mul i32
// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD: mul i32
// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD: mul i32
// IN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=int4x3 -DELEM_TY2=int3 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN3_MAT2_OVRLD
// IN3_MAT2_OVRLD: mul i32
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: mul i32
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: mul i32
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: mul i32
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int2 -DELEM_TY2=int2x4 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN2_MAT1_OVRLD_OD
// IN2_MAT1_OVRLD_OD: mul i32
// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD_OD: mul i32
// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD_OD: mul i32
// IN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN2_MAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=int4x3 -DELEM_TY2=int3 -DRET_TY=int4 %s | FileCheck %s -check-prefix=IN3_MAT2_OVRLD_OD
// IN3_MAT2_OVRLD_OD: mul i32
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: mul i32
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: mul i32
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: mul i32
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// IN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// *****************************
// uint overloads
// *****************************
// vectors and scalars
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4 -DELEM_TY2=uint4 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN4_OVRLD
// UIN4_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint3 -DELEM_TY2=uint3 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN3_OVRLD
// UIN3_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2 -DELEM_TY2=uint2 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN2_OVRLD
// UIN2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN_OVRLD
// UIN_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4 -DELEM_TY2=uint4 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN4_OVRLD_OD
// UIN4_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint3 -DELEM_TY2=uint3 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN3_OVRLD_OD
// UIN3_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2 -DELEM_TY2=uint2 -DRET_TY=uint %s | FileCheck %s -check-prefix=UIN2_OVRLD_OD
// UIN2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2x4 -DELEM_TY2=uint4x3 -DRET_TY=uint2x3 %s | FileCheck %s -check-prefix=UINMAT1_OVRLD
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: mul i32
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: mul i32
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: mul i32
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint1x4 -DELEM_TY2=uint4x1 -DRET_TY=uint1x1 %s | FileCheck %s -check-prefix=UINMAT2_OVRLD
// UINMAT2_OVRLD: mul i32
// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 48
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2x4 -DELEM_TY2=uint4x3 -DRET_TY=uint2x3 %s | FileCheck %s -check-prefix=UINMAT1_OVRLD_OD
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: mul i32
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: mul i32
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: mul i32
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint1x4 -DELEM_TY2=uint4x1 -DRET_TY=uint1x1 %s | FileCheck %s -check-prefix=UINMAT2_OVRLD_OD
// UINMAT2_OVRLD_OD: mul i32
// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// UINMAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 48
// mixed: scalar and vector
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN1_4_OVRLD
// UIN1_4_OVRLD: mul i32
// UIN1_4_OVRLD: mul i32
// UIN1_4_OVRLD: mul i32
// UIN1_4_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint3 -DELEM_TY2=uint -DRET_TY=uint3 %s | FileCheck %s -check-prefix=UIN3_1_OVRLD
// UIN3_1_OVRLD: mul i32
// UIN3_1_OVRLD: mul i32
// UIN3_1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint -DELEM_TY2=uint4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN1_4_OVRLD_OD
// UIN1_4_OVRLD_OD: mul i32
// UIN1_4_OVRLD_OD: mul i32
// UIN1_4_OVRLD_OD: mul i32
// UIN1_4_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint3 -DELEM_TY2=uint -DRET_TY=uint3 %s | FileCheck %s -check-prefix=UIN3_1_OVRLD_OD
// UIN3_1_OVRLD_OD: mul i32
// UIN3_1_OVRLD_OD: mul i32
// UIN3_1_OVRLD_OD: mul i32
// mixed: scalar and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint2x4 -DRET_TY=uint2x4 %s | FileCheck %s -check-prefix=UIN1_MAT1_OVRLD
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// UIN1_MAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4x3 -DELEM_TY2=uint -DRET_TY=uint4x3 %s | FileCheck %s -check-prefix=UIN1_MAT2_OVRLD
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// UIN1_MAT2_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint -DELEM_TY2=uint2x4 -DRET_TY=uint2x4 %s | FileCheck %s -check-prefix=UIN1_MAT1_OVRLD_OD
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// UIN1_MAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4x3 -DELEM_TY2=uint -DRET_TY=uint4x3 %s | FileCheck %s -check-prefix=UIN1_MAT2_OVRLD_OD
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// UIN1_MAT2_OVRLD_OD: mul i32
// mixed: vector and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint2 -DELEM_TY2=uint2x4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN2_MAT1_OVRLD
// UIN2_MAT1_OVRLD: mul i32
// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD: mul i32
// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD: mul i32
// UIN2_MAT1_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD: mul i32
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=uint4x3 -DELEM_TY2=uint3 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN3_MAT2_OVRLD
// UIN3_MAT2_OVRLD: mul i32
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: mul i32
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: mul i32
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: mul i32
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD: call i32 @dx.op.tertiary.i32(i32 49
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint2 -DELEM_TY2=uint2x4 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN2_MAT1_OVRLD_OD
// UIN2_MAT1_OVRLD_OD: mul i32
// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD_OD: mul i32
// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD_OD: mul i32
// UIN2_MAT1_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN2_MAT1_OVRLD_OD: mul i32
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=uint4x3 -DELEM_TY2=uint3 -DRET_TY=uint4 %s | FileCheck %s -check-prefix=UIN3_MAT2_OVRLD_OD
// UIN3_MAT2_OVRLD_OD: mul i32
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: mul i32
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: mul i32
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: mul i32
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// UIN3_MAT2_OVRLD_OD: call i32 @dx.op.tertiary.i32(i32 49
// *****************************
// min16float overloads
// *****************************
// vectors and scalars
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4 -DELEM_TY2=min16float4 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL4_OVRLD
// MNFL4_OVRLD: call half @dx.op.dot4.f16(i32 56
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float3 -DELEM_TY2=min16float3 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL3_OVRLD
// MNFL3_OVRLD: call half @dx.op.dot3.f16
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2 -DELEM_TY2=min16float2 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL2_OVRLD
// MNFL2_OVRLD: call half @dx.op.dot2.f16
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL_OVRLD
// MNFL_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4 -DELEM_TY2=min16float4 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL4_OVRLD_OD
// MNFL4_OVRLD_OD: call half @dx.op.dot4.f16(i32 56
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float3 -DELEM_TY2=min16float3 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL3_OVRLD_OD
// MNFL3_OVRLD_OD: call half @dx.op.dot3.f16
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2 -DELEM_TY2=min16float2 -DRET_TY=min16float %s | FileCheck %s -check-prefix=MNFL2_OVRLD_OD
// MNFL2_OVRLD_OD: call half @dx.op.dot2.f16
// matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2x4 -DELEM_TY2=min16float4x3 -DRET_TY=min16float2x3 %s | FileCheck %s -check-prefix=MNFLMAT1_OVRLD
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: fmul fast half
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: fmul fast half
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: fmul fast half
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float1x4 -DELEM_TY2=min16float4x1 -DRET_TY=min16float1x1 %s | FileCheck %s -check-prefix=MNFLMAT2_OVRLD
// MNFLMAT2_OVRLD: fmul fast half
// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2x4 -DELEM_TY2=min16float4x3 -DRET_TY=min16float2x3 %s | FileCheck %s -check-prefix=MNFLMAT1_OVRLD_OD
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: fmul fast half
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: fmul fast half
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: fmul fast half
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT1_OVRLD_OD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float1x4 -DELEM_TY2=min16float4x1 -DRET_TY=min16float1x1 %s | FileCheck %s -check-prefix=MNFLMAT2_OVRLD_OD
// MNFLMAT2_OVRLD_OD: fmul fast half
// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFLMAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// mixed: scalar and vector
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL1_4_OVRLD
// MNFL1_4_OVRLD: fmul fast half
// MNFL1_4_OVRLD: fmul fast half
// MNFL1_4_OVRLD: fmul fast half
// MNFL1_4_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float3 -DELEM_TY2=min16float -DRET_TY=min16float3 %s | FileCheck %s -check-prefix=MNFL3_1_OVRLD
// MNFL3_1_OVRLD: fmul fast half
// MNFL3_1_OVRLD: fmul fast half
// MNFL3_1_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float -DELEM_TY2=min16float4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL1_4_OVRLD_OD
// MNFL1_4_OVRLD_OD: fmul fast half
// MNFL1_4_OVRLD_OD: fmul fast half
// MNFL1_4_OVRLD_OD: fmul fast half
// MNFL1_4_OVRLD_OD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float3 -DELEM_TY2=min16float -DRET_TY=min16float3 %s | FileCheck %s -check-prefix=MNFL3_1_OVRLD_OD
// MNFL3_1_OVRLD_OD: fmul fast half
// MNFL3_1_OVRLD_OD: fmul fast half
// MNFL3_1_OVRLD_OD: fmul fast half
// mixed: scalar and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float2x4 -DRET_TY=min16float2x4 %s | FileCheck %s -check-prefix=MNFL1_MAT1_OVRLD
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// MNFL1_MAT1_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float -DRET_TY=min16float4x3 %s | FileCheck %s -check-prefix=MNFL1_MAT2_OVRLD
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// MNFL1_MAT2_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float -DELEM_TY2=min16float2x4 -DRET_TY=min16float2x4 %s | FileCheck %s -check-prefix=MNFL1_MAT1_OVRLD_OD
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// MNFL1_MAT1_OVRLD_OD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float -DRET_TY=min16float4x3 %s | FileCheck %s -check-prefix=MNFL1_MAT2_OVRLD_OD
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: extractvalue %dx.types.CBufRet.f16
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// MNFL1_MAT2_OVRLD_OD: fmul fast half
// mixed: vector and matrix
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float2 -DELEM_TY2=min16float2x4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL2_MAT1_OVRLD
// MNFL2_MAT1_OVRLD: fmul fast half
// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD: fmul fast half
// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD: fmul fast half
// MNFL2_MAT1_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float3 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL3_MAT2_OVRLD
// MNFL3_MAT2_OVRLD: fmul fast half
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: fmul fast half
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: fmul fast half
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: fmul fast half
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD: call half @dx.op.tertiary.f16(i32 46
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float2 -DELEM_TY2=min16float2x4 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL2_MAT1_OVRLD_OD
// MNFL2_MAT1_OVRLD_OD: fmul fast half
// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD_OD: fmul fast half
// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD_OD: fmul fast half
// MNFL2_MAT1_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL2_MAT1_OVRLD_OD: fmul fast half
// RUN: %dxc -T vs_6_0 -E main -Od -DELEM_TY1=min16float4x3 -DELEM_TY2=min16float3 -DRET_TY=min16float4 %s | FileCheck %s -check-prefix=MNFL3_MAT2_OVRLD_OD
// MNFL3_MAT2_OVRLD_OD: fmul fast half
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: fmul fast half
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: fmul fast half
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: fmul fast half
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
// MNFL3_MAT2_OVRLD_OD: call half @dx.op.tertiary.f16(i32 46
cbuffer CB {
ELEM_TY1 e1;
ELEM_TY2 e2;
};
RET_TY main(): OUT
{
return mul(e1, e2);
}

Просмотреть файл

@ -0,0 +1,167 @@
// This file contains tests covering all overloads of mul intrinsic
// as documented here: https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-mul
// RUN: %dxc -T vs_6_0 -E main %s | FileCheck %s
struct DS {
float f_1;
float f_2;
float f_3;
float f_4;
float2 f2_1;
float2 f2_2;
float3 f3_1;
float3 f3_2;
float4 f4_1;
float4 f4_2;
float2x3 fm_1;
float3x4 fm_2;
float2x4 fm_3;
int i_1;
int i_2;
int i_3;
int i_4;
int2 i2_1;
int2 i2_2;
int3 i3_1;
int3 i3_2;
int4 i4_1;
int4 i4_2;
int2x3 im_1;
int3x4 im_2;
int2x4 im_3;
uint u_1;
uint u_2;
uint u_3;
uint u_4;
uint2 u2_1;
uint2 u2_2;
uint3 u3_1;
uint3 u3_2;
uint4 u4_1;
uint4 u4_2;
uint2x3 um_1;
uint3x4 um_2;
uint2x4 um_3;
};
RWStructuredBuffer<DS> SB;
void main()
{
//***************
// float overloads
//***************
// scalar-scalar
// CHECK: float 1.000000e+01
SB[0].f_1 = mul(2, 5);
// vector-vector
// CHECK: float 1.100000e+01
SB[0].f_2 = mul(float2(1, 2), float2(3, 4));
// CHECK: float 2.300000e+01
SB[0].f_3 = mul(float3(1, 2, 3), float3(3, 4, 4));
// CHECK: float 3.200000e+01
SB[0].f_4 = mul(float4(1, 2, 2, 1), float4(3, 4, 8, 5));
// scalar-vector
// CHECK: float 2.000000e+00, float 6.000000e+00
SB[0].f2_1 = mul(2, float2(1, 3));
// CHECK: float 3.000000e+00, float 9.000000e+00, float 0.000000e+00
SB[0].f3_1 = mul(float3(1, 3, 0), 3);
// CHECK: float 4.000000e+01, float 3.000000e+01, float 1.000000e+01, float 3.000000e+01
SB[0].f4_1 = mul(10, float4(4, 3, 1, 3));
float2x3 m1 = {1, 2, 3, 4, 5, 6};
float3x4 m2 = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
float2x4 m3 = {2, 3, 4, 5, 6, 7, 8, 9};
// scalar-matrix
// CHECK: float 2.000000e+00, float 8.000000e+00, float 4.000000e+00, float 1.000000e+01
// CHECK: float 6.000000e+00, float 1.200000e+01
SB[0].fm_1 = mul(2, m1);
// CHECK: float 1.330000e+02, float 2.090000e+02, float 2.850000e+02, float 1.520000e+02
// CHECK: float 2.280000e+02, float 3.040000e+02, float 1.710000e+02, float 2.470000e+02
// CHECK: float 3.230000e+02, float 1.900000e+02, float 2.660000e+02, float 3.420000e+02
SB[0].fm_2 = mul(m2, 19);
// matrix-matrix
// CHECK: float 7.400000e+01, float 1.730000e+02, float 8.000000e+01, float 1.880000e+02
// CHECK: float 8.600000e+01, float 2.030000e+02, float 9.200000e+01, float 2.180000e+02
SB[0].fm_3 = mul(m1, m2);
// vector-matrix
// CHECK: float 1.400000e+01, float 3.200000e+01
SB[0].f2_2 = mul(m1, float3(1, 2, 3));
// CHECK: float 9.000000e+00, float 1.200000e+01, float 1.500000e+01
SB[0].f3_2 = mul(float2(1, 2), m1);
// CHECK: float 1.400000e+01, float 1.700000e+01, float 2.000000e+01, float 2.300000e+01
SB[0].f4_2 = mul(float2(1, 2), m3);
//***************
// int overloads
//***************
// scalar-scalar
// CHECK: i32 -10
SB[0].i_1 = mul(2, -5);
// vector-vector
// CHECK: i32 -5
SB[0].i_2 = mul(int2(1, -2), int2(3, 4));
// CHECK: i32 7
SB[0].i_3 = mul(int3(1, 2, 3), int3(3, -4, 4));
// CHECK: i32 -16
SB[0].i_4 = mul(int4(1, -2, 2, 1), int4(3, 4, -8, 5));
// scalar-vector
// CHECK: i32 2, i32 -6
SB[0].i2_1 = mul(2, int2(1, -3));
// CHECK: i32 3, i32 -9, i32 0
SB[0].i3_1 = mul(int3(1, -3, 0), 3);
// CHECK: i32 40, i32 30, i32 10, i32 30
SB[0].i4_1 = mul(10, int4(4, 3, 1, 3));
int2x3 im1 = {1, 2, -3, 4, -5, 6};
int3x4 im2 = {7, 8, 9, 10, -11, 12, 13, 14, -15, 16, -17, 18};
int2x4 im3 = {2, 3, 4, -5, 6, 7, -8, -9};
// scalar-matrix
// CHECK: i32 -2, i32 -8, i32 -4, i32 10
// CHECK: i32 6, i32 -12
SB[0].im_1 = mul(-2, im1);
// CHECK: i32 133, i32 -209, i32 -285, i32 152
// CHECK: i32 228, i32 304, i32 171, i32 247
// CHECK: i32 -323, i32 190, i32 266, i32 342
SB[0].im_2 = mul(im2, 19);
// matrix-matrix
// CHECK: i32 30, i32 -7, i32 -16, i32 68
// CHECK: i32 86, i32 -131, i32 -16, i32 78
SB[0].im_3 = mul(im1, im2);
// vector-matrix
// CHECK: i32 -12, i32 32
SB[0].i2_2 = mul(im1, int3(1, -2, 3));
// CHECK: i32 9, i32 -8, i32 9
SB[0].i3_2 = mul(int2(1, 2), im1);
// CHECK: i32 10, i32 11, i32 -20, i32 -13
SB[0].i4_2 = mul(int2(-1, 2), im3);
}

Просмотреть файл

@ -722,6 +722,9 @@ public:
// Iterate over all RUN lines
for (auto &cmd : cmds) {
RunFileCheckFromCommands(cmd.c_str(), fileName);
// If any of the RUN cmd fails then skip executing remaining cmds
// and report the error
if (this->RunResult != 0) break;
}
}