[spirv] Translation of several intrinsic functions (#639)

* [spirv] Translation of several intrinsic functions

Translation for the following intrinsic functions:

* Matrix transpose
* smoothstep
* refract
* distance
* isinf
* isnan
* isfinite
* sincos
* saturate
* atan2
* fma
* fmod
* frac
* firstbithigh
* firstbitlow

* Address code review comments.
This commit is contained in:
Ehsan 2017-09-26 12:18:23 -04:00 коммит произвёл David Peixotto
Родитель 58b7f88864
Коммит a62fc13575
19 изменённых файлов: 649 добавлений и 10 удалений

Просмотреть файл

@ -1061,6 +1061,13 @@ The following intrinsic HLSL functions are currently supported:
- ``asuint``: converts the component type of a scalar/vector/matrix from float
or int into uint. Uses ``OpBitcast``. This method currently does not support
conversion into unsigned integer matrices.
- ``transpose`` : Transposes the specified matrix. Uses SPIR-V ``OpTranspose``.
- ``isnan`` : Determines if the specified value is NaN. Uses SPIR-V ``OpIsNan``.
- ``isinf`` : Determines if the specified value is infinite. Uses SPIR-V ``OpIsInf``.
- ``isfinite`` : Determines if the specified value is finite. Since ``OpIsFinite``
requires the ``Kernel`` capability, translation is done using ``OpIsNan`` and ``OpIsInf``.
A given value is finite iff it is not NaN and not infinite.
- ``fmod`` : Returns the floating-point remainder for division of its arguments. Uses SPIR-V ``OpFMod``.
Using GLSL extended instructions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -1075,17 +1082,23 @@ HLSL Intrinsic Function GLSL Extended Instruction
``acos`` ``Acos``
``asin`` ``Asin``
``atan`` ``Atan``
``atan2`` ``Atan2``
``ceil`` ``Ceil``
``clamp`` ``SClamp``/``UClamp``/``FClamp``
``cos`` ``Cos``
``cosh`` ``Cosh``
``cross`` ``Cross``
``cross`` ``Cross``
``degrees`` ``Degrees``
``distance`` ``Distance``
``radians`` ``Radian``
``determinant`` ``Determinant``
``exp`` ``Exp``
``exp2`` ``exp2``
``firstbithigh`` ``FindSMsb`` / ``FindUMsb``
``firstbitlow`` ``FindILsb``
``floor`` ``Floor``
``fma`` ``Fma``
``frac`` ``Fract``
``length`` ``Length``
``log`` ``Log``
``log2`` ``Log2``
@ -1094,15 +1107,19 @@ HLSL Intrinsic Function GLSL Extended Instruction
``normalize`` ``Normalize``
``pow`` ``Pow``
``reflect`` ``Reflect``
``refract`` ``Refract``
``round`` ``Round``
``rsqrt`` ``InverseSqrt``
``step`` ``Step``
``saturate`` ``FClamp``
``sign`` ``SSign``/``FSign``
``sin`` ``Sin``
``sincos`` ``Sin`` and ``Cos``
``sinh`` ``Sinh``
``smoothstep`` ``SmoothStep``
``sqrt`` ``Sqrt``
``step`` ``Step``
``tan`` ``Tan``
``tanh`` ``Tanh``
``sqrt`` ``Sqrt``
``trunc`` ``Trunc``
======================= ===============================

Просмотреть файл

@ -3293,6 +3293,12 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
GLSLstd450 glslOpcode = GLSLstd450Bad;
#define INTRINSIC_SPIRV_OP_CASE(intrinsicOp, spirvOp, doEachVec) \
case hlsl::IntrinsicOp::IOP_##intrinsicOp: { \
return processIntrinsicUsingSpirvInst(callExpr, spv::Op::Op##spirvOp, \
doEachVec); \
} break
#define INTRINSIC_OP_CASE(intrinsicOp, glslOp, doEachVec) \
case hlsl::IntrinsicOp::IOP_##intrinsicOp: { \
glslOpcode = GLSLstd450::GLSLstd450##glslOp; \
@ -3307,6 +3313,14 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
return processIntrinsicUsingGLSLInst(callExpr, glslOpcode, doEachVec); \
} break
#define INTRINSIC_OP_CASE_SINT_UINT(intrinsicOp, glslSintOp, glslUintOp, \
doEachVec) \
case hlsl::IntrinsicOp::IOP_##intrinsicOp: { \
glslOpcode = isSintType ? GLSLstd450::GLSLstd450##glslSintOp \
: GLSLstd450::GLSLstd450##glslUintOp; \
return processIntrinsicUsingGLSLInst(callExpr, glslOpcode, doEachVec); \
} break
#define INTRINSIC_OP_CASE_SINT_UINT_FLOAT(intrinsicOp, glslSintOp, glslUintOp, \
glslFloatOp, doEachVec) \
case hlsl::IntrinsicOp::IOP_##intrinsicOp: { \
@ -3341,21 +3355,40 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
GLSLstd450::GLSLstd450SSign,
/*actPerRowForMatrices*/ true);
}
case hlsl::IntrinsicOp::IOP_isfinite: {
return processIntrinsicIsFinite(callExpr);
}
case hlsl::IntrinsicOp::IOP_sincos: {
return processIntrinsicSinCos(callExpr);
}
case hlsl::IntrinsicOp::IOP_saturate: {
return processIntrinsicSaturate(callExpr);
}
INTRINSIC_SPIRV_OP_CASE(transpose, Transpose, false);
INTRINSIC_SPIRV_OP_CASE(isinf, IsInf, true);
INTRINSIC_SPIRV_OP_CASE(isnan, IsNan, true);
INTRINSIC_SPIRV_OP_CASE(fmod, FMod, true);
INTRINSIC_OP_CASE(round, Round, true);
INTRINSIC_OP_CASE_INT_FLOAT(abs, SAbs, FAbs, true);
INTRINSIC_OP_CASE(acos, Acos, true);
INTRINSIC_OP_CASE(asin, Asin, true);
INTRINSIC_OP_CASE(atan, Atan, true);
INTRINSIC_OP_CASE(atan2, Atan2, true);
INTRINSIC_OP_CASE(ceil, Ceil, true);
INTRINSIC_OP_CASE(cos, Cos, true);
INTRINSIC_OP_CASE(cosh, Cosh, true);
INTRINSIC_OP_CASE(cross, Cross, false);
INTRINSIC_OP_CASE(degrees, Degrees, true);
INTRINSIC_OP_CASE(radians, Radians, true);
INTRINSIC_OP_CASE(distance, Distance, false);
INTRINSIC_OP_CASE(determinant, Determinant, false);
INTRINSIC_OP_CASE(exp, Exp, true);
INTRINSIC_OP_CASE(exp2, Exp2, true);
INTRINSIC_OP_CASE_SINT_UINT(firstbithigh, FindSMsb, FindUMsb, false);
INTRINSIC_OP_CASE_SINT_UINT(ufirstbithigh, FindSMsb, FindUMsb, false);
INTRINSIC_OP_CASE(firstbitlow, FindILsb, false);
INTRINSIC_OP_CASE(floor, Floor, true);
INTRINSIC_OP_CASE(fma, Fma, true);
INTRINSIC_OP_CASE(frac, Fract, true);
INTRINSIC_OP_CASE(length, Length, false);
INTRINSIC_OP_CASE(log, Log, true);
INTRINSIC_OP_CASE(log2, Log2, true);
@ -3365,8 +3398,11 @@ uint32_t SPIRVEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
INTRINSIC_OP_CASE(umin, UMin, true);
INTRINSIC_OP_CASE(normalize, Normalize, false);
INTRINSIC_OP_CASE(pow, Pow, true);
INTRINSIC_OP_CASE(radians, Radians, true);
INTRINSIC_OP_CASE(reflect, Reflect, false);
INTRINSIC_OP_CASE(refract, Refract, false);
INTRINSIC_OP_CASE(rsqrt, InverseSqrt, true);
INTRINSIC_OP_CASE(smoothstep, SmoothStep, true);
INTRINSIC_OP_CASE(step, Step, true);
INTRINSIC_OP_CASE(sin, Sin, true);
INTRINSIC_OP_CASE(sinh, Sinh, true);
@ -3733,6 +3769,88 @@ uint32_t SPIRVEmitter::processIntrinsicAsType(const CallExpr *callExpr) {
doExpr(arg));
}
uint32_t SPIRVEmitter::processIntrinsicIsFinite(const CallExpr *callExpr) {
// Since OpIsFinite needs the Kernel capability, translation is instead done
// using OpIsNan and OpIsInf:
// isFinite = !(isNan || isInf)
const auto arg = doExpr(callExpr->getArg(0));
const auto returnType = typeTranslator.translateType(callExpr->getType());
const auto isNan =
theBuilder.createUnaryOp(spv::Op::OpIsNan, returnType, arg);
const auto isInf =
theBuilder.createUnaryOp(spv::Op::OpIsInf, returnType, arg);
const auto isNanOrInf =
theBuilder.createBinaryOp(spv::Op::OpLogicalOr, returnType, isNan, isInf);
return theBuilder.createUnaryOp(spv::Op::OpLogicalNot, returnType,
isNanOrInf);
}
uint32_t SPIRVEmitter::processIntrinsicSinCos(const CallExpr *callExpr) {
// Since there is no sincos equivalent in SPIR-V, we need to perform Sin
// once and Cos once. We can reuse existing Sine/Cosine handling functions.
CallExpr *sincosExpr =
new (astContext) CallExpr(astContext, Stmt::StmtClass::NoStmtClass, {});
sincosExpr->setType(callExpr->getArg(0)->getType());
sincosExpr->setNumArgs(astContext, 1);
sincosExpr->setArg(0, const_cast<Expr *>(callExpr->getArg(0)));
// Perform Sin and store results in argument 1.
const uint32_t sin =
processIntrinsicUsingGLSLInst(sincosExpr, GLSLstd450::GLSLstd450Sin,
/*actPerRowForMatrices*/ true);
theBuilder.createStore(doExpr(callExpr->getArg(1)), sin);
// Perform Cos and store results in argument 2.
const uint32_t cos =
processIntrinsicUsingGLSLInst(sincosExpr, GLSLstd450::GLSLstd450Cos,
/*actPerRowForMatrices*/ true);
theBuilder.createStore(doExpr(callExpr->getArg(2)), cos);
return 0;
}
uint32_t SPIRVEmitter::processIntrinsicSaturate(const CallExpr *callExpr) {
const auto *arg = callExpr->getArg(0);
const auto argId = doExpr(arg);
const auto argType = arg->getType();
const uint32_t returnType = typeTranslator.translateType(callExpr->getType());
const uint32_t glslInstSetId = theBuilder.getGLSLExtInstSet();
if (argType->isFloatingType()) {
const uint32_t floatZero = getValueZero(argType);
const uint32_t floatOne = getValueOne(argType);
return theBuilder.createExtInst(returnType, glslInstSetId,
GLSLstd450::GLSLstd450FClamp,
{argId, floatZero, floatOne});
}
QualType elemType = {};
uint32_t vecSize = 0;
if (TypeTranslator::isVectorType(argType, &elemType, &vecSize)) {
const uint32_t vecZero = getVecValueZero(elemType, vecSize);
const uint32_t vecOne = getVecValueOne(elemType, vecSize);
return theBuilder.createExtInst(returnType, glslInstSetId,
GLSLstd450::GLSLstd450FClamp,
{argId, vecZero, vecOne});
}
uint32_t numRows = 0, numCols = 0;
if (TypeTranslator::isMxNMatrix(argType, &elemType, &numRows, &numCols)) {
const uint32_t vecZero = getVecValueZero(elemType, numCols);
const uint32_t vecOne = getVecValueOne(elemType, numCols);
const auto actOnEachVec = [this, vecZero, vecOne, glslInstSetId](
uint32_t /*index*/, uint32_t vecType,
uint32_t curRowId) {
return theBuilder.createExtInst(vecType, glslInstSetId,
GLSLstd450::GLSLstd450FClamp,
{curRowId, vecZero, vecOne});
};
return processEachVectorInMatrix(arg, argId, actOnEachVec);
}
emitError("Invalid argument type passed to saturate().");
return 0;
}
uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
// Import the GLSL.std.450 extended instruction set.
const uint32_t glslInstSetId = theBuilder.getGLSLExtInstSet();
@ -3760,6 +3878,50 @@ uint32_t SPIRVEmitter::processIntrinsicFloatSign(const CallExpr *callExpr) {
return castToInt(floatSignResultId, arg->getType(), returnType);
}
uint32_t SPIRVEmitter::processIntrinsicUsingSpirvInst(
const CallExpr *callExpr, spv::Op opcode, bool actPerRowForMatrices) {
const uint32_t returnType = typeTranslator.translateType(callExpr->getType());
if (callExpr->getNumArgs() == 1u) {
const Expr *arg = callExpr->getArg(0);
const uint32_t argId = doExpr(arg);
// If the instruction does not operate on matrices, we can perform the
// instruction on each vector of the matrix.
if (actPerRowForMatrices &&
TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
const auto actOnEachVec = [this, opcode](uint32_t /*index*/,
uint32_t vecType,
uint32_t curRowId) {
return theBuilder.createUnaryOp(opcode, vecType, {curRowId});
};
return processEachVectorInMatrix(arg, argId, actOnEachVec);
}
return theBuilder.createUnaryOp(opcode, returnType, {argId});
} else if (callExpr->getNumArgs() == 2u) {
const Expr *arg0 = callExpr->getArg(0);
const uint32_t arg0Id = doExpr(arg0);
const uint32_t arg1Id = doExpr(callExpr->getArg(1));
// If the instruction does not operate on matrices, we can perform the
// instruction on each vector of the matrix.
if (actPerRowForMatrices &&
TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
const auto actOnEachVec = [this, opcode, arg1Id](uint32_t index,
uint32_t vecType,
uint32_t arg0RowId) {
const uint32_t arg1RowId =
theBuilder.createCompositeExtract(vecType, arg1Id, {index});
return theBuilder.createBinaryOp(opcode, vecType, arg0RowId, arg1RowId);
};
return processEachVectorInMatrix(arg0, arg0Id, actOnEachVec);
}
return theBuilder.createBinaryOp(opcode, returnType, arg0Id, arg1Id);
}
emitError("Unsupported intrinsic function %0.")
<< cast<DeclRefExpr>(callExpr->getCallee())->getNameInfo().getAsString();
return 0;
}
uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
const CallExpr *callExpr, GLSLstd450 opcode, bool actPerRowForMatrices) {
// Import the GLSL.std.450 extended instruction set.
@ -3773,8 +3935,9 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
// instruction on each vector of the matrix.
if (actPerRowForMatrices &&
TypeTranslator::isSpirvAcceptableMatrixType(arg->getType())) {
const auto actOnEachVec = [this, glslInstSetId, opcode](
uint32_t /*index*/, uint32_t vecType, uint32_t curRowId) {
const auto actOnEachVec = [this, glslInstSetId,
opcode](uint32_t /*index*/, uint32_t vecType,
uint32_t curRowId) {
return theBuilder.createExtInst(vecType, glslInstSetId, opcode,
{curRowId});
};
@ -3783,15 +3946,15 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
return theBuilder.createExtInst(returnType, glslInstSetId, opcode, {argId});
} else if (callExpr->getNumArgs() == 2u) {
const Expr *arg0 = callExpr->getArg(0);
const Expr *arg1 = callExpr->getArg(1);
const uint32_t arg0Id = doExpr(arg0);
const uint32_t arg1Id = doExpr(arg1);
const uint32_t arg1Id = doExpr(callExpr->getArg(1));
// If the instruction does not operate on matrices, we can perform the
// instruction on each vector of the matrix.
if (actPerRowForMatrices &&
TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
const auto actOnEachVec = [this, glslInstSetId, opcode, arg1Id](
uint32_t index, uint32_t vecType, uint32_t arg0RowId) {
const auto actOnEachVec = [this, glslInstSetId, opcode,
arg1Id](uint32_t index, uint32_t vecType,
uint32_t arg0RowId) {
const uint32_t arg1RowId =
theBuilder.createCompositeExtract(vecType, arg1Id, {index});
return theBuilder.createExtInst(vecType, glslInstSetId, opcode,
@ -3801,6 +3964,29 @@ uint32_t SPIRVEmitter::processIntrinsicUsingGLSLInst(
}
return theBuilder.createExtInst(returnType, glslInstSetId, opcode,
{arg0Id, arg1Id});
} else if (callExpr->getNumArgs() == 3u) {
const Expr *arg0 = callExpr->getArg(0);
const uint32_t arg0Id = doExpr(arg0);
const uint32_t arg1Id = doExpr(callExpr->getArg(1));
const uint32_t arg2Id = doExpr(callExpr->getArg(2));
// If the instruction does not operate on matrices, we can perform the
// instruction on each vector of the matrix.
if (actPerRowForMatrices &&
TypeTranslator::isSpirvAcceptableMatrixType(arg0->getType())) {
const auto actOnEachVec = [this, glslInstSetId, opcode, arg0Id, arg1Id,
arg2Id](uint32_t index, uint32_t vecType,
uint32_t arg0RowId) {
const uint32_t arg1RowId =
theBuilder.createCompositeExtract(vecType, arg1Id, {index});
const uint32_t arg2RowId =
theBuilder.createCompositeExtract(vecType, arg2Id, {index});
return theBuilder.createExtInst(vecType, glslInstSetId, opcode,
{arg0RowId, arg1RowId, arg2RowId});
};
return processEachVectorInMatrix(arg0, arg0Id, actOnEachVec);
}
return theBuilder.createExtInst(returnType, glslInstSetId, opcode,
{arg0Id, arg1Id, arg2Id});
}
emitError("Unsupported intrinsic function %0.")

Просмотреть файл

@ -273,6 +273,15 @@ private:
/// Processes the 'asfloat', 'asint', and 'asuint' intrinsic functions.
uint32_t processIntrinsicAsType(const CallExpr *);
/// Processes the 'saturate' intrinsic function.
uint32_t processIntrinsicSaturate(const CallExpr *);
/// Processes the 'sincos' intrinsic function.
uint32_t processIntrinsicSinCos(const CallExpr *);
/// Processes the 'isFinite' intrinsic function.
uint32_t processIntrinsicIsFinite(const CallExpr *);
/// Processes the 'sign' intrinsic function for float types.
/// The FSign instruction in the GLSL instruction set returns a floating point
/// result. The HLSL sign function, however, returns an integer. An extra
@ -290,6 +299,13 @@ private:
uint32_t processIntrinsicMemberCall(const CXXMemberCallExpr *expr,
hlsl::IntrinsicOp opcode);
/// Processes the given intrinsic function call using the given SPIR-V
/// instruction. If the given instruction cannot operate on matrices, it
/// performs the instruction on each row of the matrix and uses composite
/// construction to generate the resulting matrix.
uint32_t processIntrinsicUsingSpirvInst(const CallExpr *, spv::Op,
bool canOperateOnMatrix);
private:
/// Returns the <result-id> for constant value 0 of the given type.
uint32_t getValueZero(QualType type);

Просмотреть файл

@ -0,0 +1,36 @@
// Run: %dxc -T vs_6_0 -E main
// According to HLSL reference:
// The 'atan' function can only operate on float, vector of float, and matrix of floats.
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float a1, a2, atan2a;
float4 b1, b2, atan2b;
float2x3 c1, c2, atan2c;
// CHECK: [[a1:%\d+]] = OpLoad %float %a1
// CHECK-NEXT: [[a2:%\d+]] = OpLoad %float %a2
// CHECK-NEXT: [[atan2a:%\d+]] = OpExtInst %float [[glsl]] Atan2 [[a1]] [[a2]]
// CHECK-NEXT: OpStore %atan2a [[atan2a]]
atan2a = atan2(a1, a2);
// CHECK: [[b1:%\d+]] = OpLoad %v4float %b1
// CHECK-NEXT: [[b2:%\d+]] = OpLoad %v4float %b2
// CHECK-NEXT: [[atan2b:%\d+]] = OpExtInst %v4float [[glsl]] Atan2 [[b1]] [[b2]]
// CHECK-NEXT: OpStore %atan2b [[atan2b]]
atan2b = atan2(b1, b2);
// CHECK: [[c1:%\d+]] = OpLoad %mat2v3float %c1
// CHECK-NEXT: [[c2:%\d+]] = OpLoad %mat2v3float %c2
// CHECK-NEXT: [[c1_row0:%\d+]] = OpCompositeExtract %v3float [[c1]] 0
// CHECK-NEXT: [[c2_row0:%\d+]] = OpCompositeExtract %v3float [[c2]] 0
// CHECK-NEXT: [[atan2c_row0:%\d+]] = OpExtInst %v3float [[glsl]] Atan2 [[c1_row0]] [[c2_row0]]
// CHECK-NEXT: [[c1_row1:%\d+]] = OpCompositeExtract %v3float [[c1]] 1
// CHECK-NEXT: [[c2_row1:%\d+]] = OpCompositeExtract %v3float [[c2]] 1
// CHECK-NEXT: [[atan2c_row1:%\d+]] = OpExtInst %v3float [[glsl]] Atan2 [[c1_row1]] [[c2_row1]]
// CHECK-NEXT: [[atan2c:%\d+]] = OpCompositeConstruct %mat2v3float [[atan2c_row0]] [[atan2c_row1]]
// CHECK-NEXT: OpStore %atan2c [[atan2c]]
atan2c = atan2(c1, c2);
}

Просмотреть файл

@ -0,0 +1,12 @@
// Run: %dxc -T ps_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float3 a, b;
// CHECK: [[a:%\d+]] = OpLoad %v3float %a
// CHECK-NEXT: [[b:%\d+]] = OpLoad %v3float %b
// CHECK-NEXT: {{%\d+}} = OpExtInst %float [[glsl]] Distance [[a]] [[b]]
float d = distance(a, b);
}

Просмотреть файл

@ -0,0 +1,27 @@
// Run: %dxc -T ps_6_0 -E main
// Note: Even though the HLSL documentation contains a version of "firstbithigh" that
// takes signed integer(s) and returns signed integer(s), the frontend always generates
// the AST using the overloaded version that takes unsigned integer(s) and returns
// unsigned integer(s). Therefore "FindSMsb" is not generated in any case below.
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
int sint_1;
int4 sint_4;
uint uint_1;
uint4 uint_4;
// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindUMsb {{%\d+}}
int fbh = firstbithigh(sint_1);
// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindUMsb {{%\d+}}
int4 fbh4 = firstbithigh(sint_4);
// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindUMsb {{%\d+}}
uint ufbh = firstbithigh(uint_1);
// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindUMsb {{%\d+}}
uint4 ufbh4 = firstbithigh(uint_4);
}

Просмотреть файл

@ -0,0 +1,27 @@
// Run: %dxc -T ps_6_0 -E main
// Note: Even though the HLSL documentation contains a version of "firstbitlow" that
// takes signed integer(s) and returns signed integer(s), the frontend always generates
// the AST using the overloaded version that takes unsigned integer(s) and returns
// unsigned integer(s).
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
int sint_1;
int4 sint_4;
uint uint_1;
uint4 uint_4;
// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindILsb {{%\d+}}
int fbl = firstbitlow(sint_1);
// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindILsb {{%\d+}}
int4 fbl4 = firstbitlow(sint_4);
// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindILsb {{%\d+}}
uint ufbl = firstbitlow(uint_1);
// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindILsb {{%\d+}}
uint4 ufbl4 = firstbitlow(uint_4);
}

Просмотреть файл

@ -0,0 +1,42 @@
// Run: %dxc -T vs_6_0 -E main
// According to HLSL reference:
// The 'fma' function can only operate on double, vector of double, and matrix of double.
// CHECK: OpCapability Float64
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
// CHECK: %double = OpTypeFloat 64
// CHECK: %v3double = OpTypeVector %double 3
// CHECK: %mat2v3double = OpTypeMatrix %v3double 2
void main() {
double a1, a2, a3, fma_a;
double4 b1, b2, b3, fma_b;
double2x3 c1, c2, c3, fma_c;
// CHECK: [[a1:%\d+]] = OpLoad %double %a1
// CHECK-NEXT: [[a2:%\d+]] = OpLoad %double %a2
// CHECK-NEXT: [[a3:%\d+]] = OpLoad %double %a3
// CHECK-NEXT: {{%\d+}} = OpExtInst %double [[glsl]] Fma [[a1]] [[a2]] [[a3]]
fma_a = fma(a1, a2, a3);
// CHECK: [[b1:%\d+]] = OpLoad %v4double %b1
// CHECK-NEXT: [[b2:%\d+]] = OpLoad %v4double %b2
// CHECK-NEXT: [[b3:%\d+]] = OpLoad %v4double %b3
// CHECK-NEXT: {{%\d+}} = OpExtInst %v4double [[glsl]] Fma [[b1]] [[b2]] [[b3]]
fma_b = fma(b1, b2, b3);
// CHECK: [[c1:%\d+]] = OpLoad %mat2v3double %c1
// CHECK-NEXT: [[c2:%\d+]] = OpLoad %mat2v3double %c2
// CHECK-NEXT: [[c3:%\d+]] = OpLoad %mat2v3double %c3
// CHECK-NEXT: [[c1_row0:%\d+]] = OpCompositeExtract %v3double [[c1]] 0
// CHECK-NEXT: [[c2_row0:%\d+]] = OpCompositeExtract %v3double [[c2]] 0
// CHECK-NEXT: [[c3_row0:%\d+]] = OpCompositeExtract %v3double [[c3]] 0
// CHECK-NEXT: [[fma_row0:%\d+]] = OpExtInst %v3double [[glsl]] Fma [[c1_row0]] [[c2_row0]] [[c3_row0]]
// CHECK-NEXT: [[c1_row1:%\d+]] = OpCompositeExtract %v3double [[c1]] 1
// CHECK-NEXT: [[c2_row1:%\d+]] = OpCompositeExtract %v3double [[c2]] 1
// CHECK-NEXT: [[c3_row1:%\d+]] = OpCompositeExtract %v3double [[c3]] 1
// CHECK-NEXT: [[fma_row1:%\d+]] = OpExtInst %v3double [[glsl]] Fma [[c1_row1]] [[c2_row1]] [[c3_row1]]
// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3double [[fma_row0]] [[fma_row1]]
fma_c = fma(c1, c2, c3);
}

Просмотреть файл

@ -0,0 +1,28 @@
// Run: %dxc -T vs_6_0 -E main
void main() {
float a1, a2, fmod_a;
float4 b1, b2, fmod_b;
float2x3 c1, c2, fmod_c;
// CHECK: [[a1:%\d+]] = OpLoad %float %a1
// CHECK-NEXT: [[a2:%\d+]] = OpLoad %float %a2
// CHECK-NEXT: {{%\d+}} = OpFMod %float [[a1]] [[a2]]
fmod_a = fmod(a1, a2);
// CHECK: [[b1:%\d+]] = OpLoad %v4float %b1
// CHECK-NEXT: [[b2:%\d+]] = OpLoad %v4float %b2
// CHECK-NEXT: {{%\d+}} = OpFMod %v4float [[b1]] [[b2]]
fmod_b = fmod(b1, b2);
// CHECK: [[c1:%\d+]] = OpLoad %mat2v3float %c1
// CHECK-NEXT: [[c2:%\d+]] = OpLoad %mat2v3float %c2
// CHECK-NEXT: [[c1_row0:%\d+]] = OpCompositeExtract %v3float [[c1]] 0
// CHECK-NEXT: [[c2_row0:%\d+]] = OpCompositeExtract %v3float [[c2]] 0
// CHECK-NEXT: [[fmod_c_row0:%\d+]] = OpFMod %v3float [[c1_row0]] [[c2_row0]]
// CHECK-NEXT: [[c1_row1:%\d+]] = OpCompositeExtract %v3float [[c1]] 1
// CHECK-NEXT: [[c2_row1:%\d+]] = OpCompositeExtract %v3float [[c2]] 1
// CHECK-NEXT: [[fmod_c_row1:%\d+]] = OpFMod %v3float [[c1_row1]] [[c2_row1]]
// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3float [[fmod_c_row0]] [[fmod_c_row1]]
fmod_c = fmod(c1, c2);
}

Просмотреть файл

@ -0,0 +1,25 @@
// Run: %dxc -T vs_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float a, frac_a;
float4 b, frac_b;
float2x3 c, frac_c;
// CHECK: [[a:%\d+]] = OpLoad %float %a
// CHECK-NEXT: {{%\d+}} = OpExtInst %float [[glsl]] Fract [[a]]
frac_a = frac(a);
// CHECK: [[b:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: {{%\d+}} = OpExtInst %v4float [[glsl]] Fract [[b]]
frac_b = frac(b);
// CHECK: [[c:%\d+]] = OpLoad %mat2v3float %c
// CHECK-NEXT: [[c_row0:%\d+]] = OpCompositeExtract %v3float [[c]] 0
// CHECK-NEXT:[[frac_c_row0:%\d+]] = OpExtInst %v3float [[glsl]] Fract [[c_row0]]
// CHECK-NEXT: [[c_row1:%\d+]] = OpCompositeExtract %v3float [[c]] 1
// CHECK-NEXT:[[frac_c_row1:%\d+]] = OpExtInst %v3float [[glsl]] Fract [[c_row1]]
// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3float [[frac_c_row0]] [[frac_c_row1]]
frac_c = frac(c);
}

Просмотреть файл

@ -0,0 +1,27 @@
// Run: %dxc -T ps_6_0 -E main
// Since OpIsFinite needs the Kernel capability, translation is done using OpIsNan and OpIsInf.
// isFinite = !isNan && !isInf.
void main() {
float a;
float4 b;
float2x3 c;
// CHECK: [[a:%\d+]] = OpLoad %float %a
// CHECK-NEXT: [[a_isNan:%\d+]] = OpIsNan %bool [[a]]
// CHECK-NEXT: [[a_isInf:%\d+]] = OpIsInf %bool [[a]]
// CHECK-NEXT: [[a_NanOrInf:%\d+]] = OpLogicalOr %bool [[a_isNan]] [[a_isInf]]
// CHECK-NEXT: {{%\d+}} = OpLogicalNot %bool [[a_NanOrInf]]
bool isf_a = isfinite(a);
// CHECK: [[b:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: [[b_isNan:%\d+]] = OpIsNan %v4bool [[b]]
// CHECK-NEXT: [[b_isInf:%\d+]] = OpIsInf %v4bool [[b]]
// CHECK-NEXT: [[b_NanOrInf:%\d+]] = OpLogicalOr %v4bool [[b_isNan]] [[b_isInf]]
// CHECK-NEXT: {{%\d+}} = OpLogicalNot %v4bool [[b_NanOrInf]]
bool4 isf_b = isfinite(b);
// TODO: We can not translate the following since boolean matrices are currently not supported.
// bool2x3 isf_c = isfinite(c);
}

Просмотреть файл

@ -0,0 +1,18 @@
// Run: %dxc -T ps_6_0 -E main
void main() {
float a;
float4 b;
float2x3 c;
// CHECK: [[a:%\d+]] = OpLoad %float %a
// CHECK-NEXT: {{%\d+}} = OpIsInf %bool [[a]]
bool isinf_a = isinf(a);
// CHECK: [[b:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: {{%\d+}} = OpIsInf %v4bool [[b]]
bool4 isinf_b = isinf(b);
// TODO: We can not translate the following since boolean matrices are currently not supported.
// bool2x3 isinf_c = isinf(c);
}

Просмотреть файл

@ -0,0 +1,18 @@
// Run: %dxc -T ps_6_0 -E main
void main() {
float a;
float4 b;
float2x3 c;
// CHECK: [[a:%\d+]] = OpLoad %float %a
// CHECK-NEXT: {{%\d+}} = OpIsNan %bool [[a]]
bool isnan_a = isnan(a);
// CHECK: [[b:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: {{%\d+}} = OpIsNan %v4bool [[b]]
bool4 isnan_b = isnan(b);
// TODO: We can not translate the following since boolean matrices are currently not supported.
// bool2x3 isnan_c = isnan(c);
}

Просмотреть файл

@ -0,0 +1,14 @@
// Run: %dxc -T ps_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float4 i, n;
float eta;
// CHECK: [[i:%\d+]] = OpLoad %v4float %i
// CHECK-NEXT: [[n:%\d+]] = OpLoad %v4float %n
// CHECK-NEXT: [[eta:%\d+]] = OpLoad %float %eta
// CHECK-NEXT: {{%\d+}} = OpExtInst %v4float [[glsl]] Refract [[i]] [[n]] [[eta]]
float4 r = refract(i, n, eta);
}

Просмотреть файл

@ -0,0 +1,32 @@
// Run: %dxc -T vs_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
// CHECK: [[v4f0:%\d+]] = OpConstantComposite %v4float %float_0 %float_0 %float_0 %float_0
// CHECK: [[v4f1:%\d+]] = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
// CHECK: [[v3f0:%\d+]] = OpConstantComposite %v3float %float_0 %float_0 %float_0
// CHECK: [[v3f1:%\d+]] = OpConstantComposite %v3float %float_1 %float_1 %float_1
void main() {
float a, sata;
float4 b, satb;
float2x3 c, satc;
// CHECK: [[a:%\d+]] = OpLoad %float %a
// CHECK-NEXT: [[sata:%\d+]] = OpExtInst %float [[glsl]] FClamp [[a]] %float_0 %float_1
// CHECK-NEXT: OpStore %sata [[sata]]
sata = saturate(a);
// CHECK: [[b:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: [[satb:%\d+]] = OpExtInst %v4float [[glsl]] FClamp [[b]] [[v4f0]] [[v4f1]]
// CHECK-NEXT: OpStore %satb [[satb]]
satb = saturate(b);
// CHECK: [[c:%\d+]] = OpLoad %mat2v3float %c
// CHECK-NEXT: [[row0:%\d+]] = OpCompositeExtract %v3float [[c]] 0
// CHECK-NEXT: [[sat0:%\d+]] = OpExtInst %v3float [[glsl]] FClamp [[row0]] [[v3f0]] [[v3f1]]
// CHECK-NEXT: [[row1:%\d+]] = OpCompositeExtract %v3float [[c]] 1
// CHECK-NEXT: [[sat1:%\d+]] = OpExtInst %v3float [[glsl]] FClamp [[row1]] [[v3f0]] [[v3f1]]
// CHECK-NEXT: [[satc:%\d+]] = OpCompositeConstruct %mat2v3float [[sat0]] [[sat1]]
// CHECK-NEXT: OpStore %satc [[satc]]
satc = saturate(c);
}

Просмотреть файл

@ -0,0 +1,41 @@
// Run: %dxc -T vs_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float a, sina, cosa;
float4 b, sinb, cosb;
float2x3 c, sinc, cosc;
// CHECK: [[a0:%\d+]] = OpLoad %float %a
// CHECK-NEXT: [[sina:%\d+]] = OpExtInst %float [[glsl]] Sin [[a0]]
// CHECK-NEXT: OpStore %sina [[sina]]
// CHECK-NEXT: [[a1:%\d+]] = OpLoad %float %a
// CHECK-NEXT: [[cosa:%\d+]] = OpExtInst %float [[glsl]] Cos [[a1]]
// CHECK-NEXT: OpStore %cosa [[cosa]]
sincos(a, sina, cosa);
// CHECK: [[b0:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: [[sinb:%\d+]] = OpExtInst %v4float [[glsl]] Sin [[b0]]
// CHECK-NEXT: OpStore %sinb [[sinb]]
// CHECK-NEXT: [[b1:%\d+]] = OpLoad %v4float %b
// CHECK-NEXT: [[cosb:%\d+]] = OpExtInst %v4float [[glsl]] Cos [[b1]]
// CHECK-NEXT: OpStore %cosb [[cosb]]
sincos(b, sinb, cosb);
// CHECK: [[c0:%\d+]] = OpLoad %mat2v3float %c
// CHECK-NEXT: [[c0_row0:%\d+]] = OpCompositeExtract %v3float [[c0]] 0
// CHECK-NEXT: [[sinc_row0:%\d+]] = OpExtInst %v3float [[glsl]] Sin [[c0_row0]]
// CHECK-NEXT: [[c0_row1:%\d+]] = OpCompositeExtract %v3float [[c0]] 1
// CHECK-NEXT: [[sinc_row1:%\d+]] = OpExtInst %v3float [[glsl]] Sin [[c0_row1]]
// CHECK-NEXT: [[sinc:%\d+]] = OpCompositeConstruct %mat2v3float [[sinc_row0]] [[sinc_row1]]
// CHECK-NEXT: OpStore %sinc [[sinc]]
// CHECK-NEXT: [[c1:%\d+]] = OpLoad %mat2v3float %c
// CHECK-NEXT: [[c1_row0:%\d+]] = OpCompositeExtract %v3float [[c1]] 0
// CHECK-NEXT: [[cosc_row0:%\d+]] = OpExtInst %v3float [[glsl]] Cos [[c1_row0]]
// CHECK-NEXT: [[c1_row1:%\d+]] = OpCompositeExtract %v3float [[c1]] 1
// CHECK-NEXT: [[cosc_row1:%\d+]] = OpExtInst %v3float [[glsl]] Cos [[c1_row1]]
// CHECK-NEXT: [[cosc:%\d+]] = OpCompositeConstruct %mat2v3float [[cosc_row0]] [[cosc_row1]]
// CHECK-NEXT: OpStore %cosc [[cosc]]
sincos(c, sinc, cosc);
}

Просмотреть файл

@ -0,0 +1,35 @@
// Run: %dxc -T ps_6_0 -E main
// CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"
void main() {
float min, max, val;
float4 min4, max4, val4;
float2x3 min2x3, max2x3, val2x3;
// CHECK: [[min:%\d+]] = OpLoad %float %min
// CHECK-NEXT: [[max:%\d+]] = OpLoad %float %max
// CHECK-NEXT: [[val:%\d+]] = OpLoad %float %val
// CHECK-NEXT: {{%\d+}} = OpExtInst %float [[glsl]] SmoothStep [[min]] [[max]] [[val]]
float ss = smoothstep(min, max, val);
// CHECK: [[min4:%\d+]] = OpLoad %v4float %min4
// CHECK-NEXT: [[max4:%\d+]] = OpLoad %v4float %max4
// CHECK-NEXT: [[val4:%\d+]] = OpLoad %v4float %val4
// CHECK-NEXT: {{%\d+}} = OpExtInst %v4float [[glsl]] SmoothStep [[min4]] [[max4]] [[val4]]
float4 ss4 = smoothstep(min4, max4, val4);
// CHECK: [[min2x3:%\d+]] = OpLoad %mat2v3float %min2x3
// CHECK-NEXT: [[max2x3:%\d+]] = OpLoad %mat2v3float %max2x3
// CHECK-NEXT: [[val2x3:%\d+]] = OpLoad %mat2v3float %val2x3
// CHECK-NEXT: [[min_r0:%\d+]] = OpCompositeExtract %v3float [[min2x3]] 0
// CHECK-NEXT: [[max_r0:%\d+]] = OpCompositeExtract %v3float [[max2x3]] 0
// CHECK-NEXT: [[val_r0:%\d+]] = OpCompositeExtract %v3float [[val2x3]] 0
// CHECK-NEXT: [[ss_r0:%\d+]] = OpExtInst %v3float [[glsl]] SmoothStep [[min_r0]] [[max_r0]] [[val_r0]]
// CHECK-NEXT: [[min_r1:%\d+]] = OpCompositeExtract %v3float [[min2x3]] 1
// CHECK-NEXT: [[max_r1:%\d+]] = OpCompositeExtract %v3float [[max2x3]] 1
// CHECK-NEXT: [[val_r1:%\d+]] = OpCompositeExtract %v3float [[val2x3]] 1
// CHECK-NEXT: [[ss_r1:%\d+]] = OpExtInst %v3float [[glsl]] SmoothStep [[min_r1]] [[max_r1]] [[val_r1]]
// CHECK-NEXT: {{%\d+}} = OpCompositeConstruct %mat2v3float [[ss_r0]] [[ss_r1]]
float2x3 ss2x3 = smoothstep(min2x3, max2x3, val2x3);
}

Просмотреть файл

@ -0,0 +1,9 @@
// Run: %dxc -T ps_6_0 -E main
void main() {
float2x3 m = { {1,2,3} , {4,5,6} };
// CHECK: [[m:%\d+]] = OpLoad %mat2v3float %m
// CHECK-NEXT: {{%\d+}} = OpTranspose %mat3v2float [[m]]
float3x2 n = transpose(m);
}

Просмотреть файл

@ -442,13 +442,30 @@ TEST_F(FileTest, IntrinsicsCross) { runFileTest("intrinsics.cross.hlsl"); }
TEST_F(FileTest, IntrinsicsCeil) { runFileTest("intrinsics.ceil.hlsl"); }
TEST_F(FileTest, IntrinsicsClamp) { runFileTest("intrinsics.clamp.hlsl"); }
TEST_F(FileTest, IntrinsicsDegrees) { runFileTest("intrinsics.degrees.hlsl"); }
TEST_F(FileTest, IntrinsicsDistance) {
runFileTest("intrinsics.distance.hlsl");
}
TEST_F(FileTest, IntrinsicsRadians) { runFileTest("intrinsics.radians.hlsl"); }
TEST_F(FileTest, IntrinsicsDeterminant) {
runFileTest("intrinsics.determinant.hlsl");
}
TEST_F(FileTest, IntrinsicsExp) { runFileTest("intrinsics.exp.hlsl"); }
TEST_F(FileTest, IntrinsicsExp2) { runFileTest("intrinsics.exp2.hlsl"); }
TEST_F(FileTest, IntrinsicsFirstBitHigh) {
runFileTest("intrinsics.firstbithigh.hlsl");
}
TEST_F(FileTest, IntrinsicsFirstBitLow) {
runFileTest("intrinsics.firstbitlow.hlsl");
}
TEST_F(FileTest, IntrinsicsFloor) { runFileTest("intrinsics.floor.hlsl"); }
TEST_F(FileTest, IntrinsicsFma) { runFileTest("intrinsics.fma.hlsl"); }
TEST_F(FileTest, IntrinsicsFmod) { runFileTest("intrinsics.fmod.hlsl"); }
TEST_F(FileTest, IntrinsicsFrac) { runFileTest("intrinsics.frac.hlsl"); }
TEST_F(FileTest, IntrinsicsIsFinite) {
runFileTest("intrinsics.isfinite.hlsl");
}
TEST_F(FileTest, IntrinsicsIsInf) { runFileTest("intrinsics.isinf.hlsl"); }
TEST_F(FileTest, IntrinsicsIsNan) { runFileTest("intrinsics.isnan.hlsl"); }
TEST_F(FileTest, IntrinsicsLength) { runFileTest("intrinsics.length.hlsl"); }
TEST_F(FileTest, IntrinsicsLog) { runFileTest("intrinsics.log.hlsl"); }
TEST_F(FileTest, IntrinsicsLog2) { runFileTest("intrinsics.log2.hlsl"); }
@ -464,13 +481,24 @@ TEST_F(FileTest, IntrinsicsFloatSign) {
}
TEST_F(FileTest, IntrinsicsIntSign) { runFileTest("intrinsics.intsign.hlsl"); }
TEST_F(FileTest, IntrinsicsReflect) { runFileTest("intrinsics.reflect.hlsl"); }
TEST_F(FileTest, IntrinsicsRefract) { runFileTest("intrinsics.refract.hlsl"); }
TEST_F(FileTest, IntrinsicsSaturate) {
runFileTest("intrinsics.saturate.hlsl");
}
TEST_F(FileTest, IntrinsicsSmoothStep) {
runFileTest("intrinsics.smoothstep.hlsl");
}
TEST_F(FileTest, IntrinsicsStep) { runFileTest("intrinsics.step.hlsl"); }
TEST_F(FileTest, IntrinsicsSqrt) { runFileTest("intrinsics.sqrt.hlsl"); }
TEST_F(FileTest, IntrinsicsTranspose) {
runFileTest("intrinsics.transpose.hlsl");
}
TEST_F(FileTest, IntrinsicsTrunc) { runFileTest("intrinsics.trunc.hlsl"); }
// For intrinsic trigonometric functions
TEST_F(FileTest, IntrinsicsSin) { runFileTest("intrinsics.sin.hlsl"); }
TEST_F(FileTest, IntrinsicsCos) { runFileTest("intrinsics.cos.hlsl"); }
TEST_F(FileTest, IntrinsicsSinCos) { runFileTest("intrinsics.sincos.hlsl"); }
TEST_F(FileTest, IntrinsicsTan) { runFileTest("intrinsics.tan.hlsl"); }
TEST_F(FileTest, IntrinsicsSinh) { runFileTest("intrinsics.sinh.hlsl"); }
TEST_F(FileTest, IntrinsicsCosh) { runFileTest("intrinsics.cosh.hlsl"); }
@ -478,6 +506,7 @@ TEST_F(FileTest, IntrinsicsTanh) { runFileTest("intrinsics.tanh.hlsl"); }
TEST_F(FileTest, IntrinsicsAsin) { runFileTest("intrinsics.asin.hlsl"); }
TEST_F(FileTest, IntrinsicsAcos) { runFileTest("intrinsics.acos.hlsl"); }
TEST_F(FileTest, IntrinsicsAtan) { runFileTest("intrinsics.atan.hlsl"); }
TEST_F(FileTest, IntrinsicsAtan2) { runFileTest("intrinsics.atan2.hlsl"); }
// For attributes
TEST_F(FileTest, AttributeNumThreads) {