[SPIR-V] Remove bitcast from float->int conversion (#6533)

OpConvertFToU and FToS convert numerically from floating point to
integer, with rounding toward 0.0, matching the defined behavior of HLSL
without any need for an initial bitwidth conversion from floating point
to floating point, which can result in incorrect rounding behavior (see
#6501).

Note that behavior is undefined if the target type is not wide enough to
hold the converted value, however this was also true with an initial
bitcast (because an N-bit FP can hold values outside the range of an
N-bit int), and I can't come up with any case where an intial truncation
would result in correct defined behavior where this straight conversion
would be undefined since the precision will inevitably be lost anyways.

Fixes #6501
This commit is contained in:
Natalie Chouinard 2024-04-18 09:31:45 -04:00 коммит произвёл GitHub
Родитель dda80a98c4
Коммит 9ee056e597
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
4 изменённых файлов: 15 добавлений и 29 удалений

Просмотреть файл

@ -8233,17 +8233,14 @@ SpirvInstruction *SpirvEmitter::castToInt(SpirvInstruction *fromVal,
}
if (isFloatOrVecOfFloatType(fromType)) {
// First convert the source to the bitwidth of the destination if necessary.
fromVal = convertBitwidth(fromVal, srcLoc, fromType, toIntType, nullptr,
srcRange);
if (isSintOrVecOfSintType(toIntType)) {
return spvBuilder.createUnaryOp(spv::Op::OpConvertFToS, toIntType,
fromVal, srcLoc, srcRange);
} else if (isUintOrVecOfUintType(toIntType)) {
}
if (isUintOrVecOfUintType(toIntType)) {
return spvBuilder.createUnaryOp(spv::Op::OpConvertFToU, toIntType,
fromVal, srcLoc, srcRange);
} else {
emitError("casting from floating point to integer unimplemented", srcLoc);
}
}
@ -8310,6 +8307,7 @@ SpirvInstruction *SpirvEmitter::castToInt(SpirvInstruction *fromVal,
return result;
}
emitError("casting from given type to integer unimplemented", srcLoc);
return nullptr;
}

Просмотреть файл

@ -41,8 +41,7 @@ void main() {
// 32-bit float to various 64-bit types.
float aaa;
// CHECK: [[aaa:%[0-9]+]] = OpLoad %float %aaa
// CHECK-NEXT: [[aaa_double:%[0-9]+]] = OpFConvert %double [[aaa]]
// CHECK-NEXT: [[aaa_ulong:%[0-9]+]] = OpConvertFToU %ulong [[aaa_double]]
// CHECK-NEXT: [[aaa_ulong:%[0-9]+]] = OpConvertFToU %ulong [[aaa]]
// CHECK-NEXT: OpStore %bbb [[aaa_ulong]]
uint64_t bbb = aaa;
// CHECK: [[aaa_0:%[0-9]+]] = OpLoad %float %aaa
@ -50,8 +49,7 @@ void main() {
// CHECK-NEXT: OpStore %ccc [[aaa_double_0]]
double ccc = aaa;
// CHECK: [[aaa_1:%[0-9]+]] = OpLoad %float %aaa
// CHECK-NEXT: [[aaa_double_1:%[0-9]+]] = OpFConvert %double [[aaa_1]]
// CHECK-NEXT: [[aaa_long:%[0-9]+]] = OpConvertFToS %long [[aaa_double_1]]
// CHECK-NEXT: [[aaa_long:%[0-9]+]] = OpConvertFToS %long [[aaa_1]]
// CHECK-NEXT: OpStore %ddd [[aaa_long]]
int64_t ddd = aaa;
@ -95,8 +93,7 @@ void main() {
// 64-bit float to various 32-bit types.
double eee;
// CHECK: [[e64_2:%[0-9]+]] = OpLoad %double %eee
// CHECK-NEXT: [[e32_2:%[0-9]+]] = OpFConvert %float [[e64_2]]
// CHECK-NEXT: [[e_uint_0:%[0-9]+]] = OpConvertFToU %uint [[e32_2]]
// CHECK-NEXT: [[e_uint_0:%[0-9]+]] = OpConvertFToU %uint [[e64_2]]
// CHECK-NEXT: OpStore %fff [[e_uint_0]]
uint fff = eee;
// CHECK: [[e_2:%[0-9]+]] = OpLoad %double %eee
@ -104,8 +101,7 @@ void main() {
// CHECK-NEXT: OpStore %ggg [[e_float_1]]
float ggg = eee;
// CHECK: [[e_3:%[0-9]+]] = OpLoad %double %eee
// CHECK-NEXT: [[e_float_2:%[0-9]+]] = OpFConvert %float [[e_3]]
// CHECK-NEXT: [[e_int_3:%[0-9]+]] = OpConvertFToS %int [[e_float_2]]
// CHECK-NEXT: [[e_int_3:%[0-9]+]] = OpConvertFToS %int [[e_3]]
// CHECK-NEXT: OpStore %hhh [[e_int_3]]
int hhh = eee;
@ -113,13 +109,11 @@ void main() {
// Vector case: 64-bit float to various 32-bit types.
double2 i;
// CHECK: [[i_double:%[0-9]+]] = OpLoad %v2double %i
// CHECK-NEXT: [[i_float:%[0-9]+]] = OpFConvert %v2float [[i_double]]
// CHECK-NEXT: [[i_uint:%[0-9]+]] = OpConvertFToU %v2uint [[i_float]]
// CHECK-NEXT: [[i_uint:%[0-9]+]] = OpConvertFToU %v2uint [[i_double]]
// CHECK-NEXT: OpStore %j [[i_uint]]
uint2 j = i;
// CHECK: [[i_double_0:%[0-9]+]] = OpLoad %v2double %i
// CHECK-NEXT: [[i_float_0:%[0-9]+]] = OpFConvert %v2float [[i_double_0]]
// CHECK-NEXT: [[i_int:%[0-9]+]] = OpConvertFToS %v2int [[i_float_0]]
// CHECK-NEXT: [[i_int:%[0-9]+]] = OpConvertFToS %v2int [[i_double_0]]
// CHECK-NEXT: OpStore %k [[i_int]]
int2 k = i;
// CHECK: [[i_double_1:%[0-9]+]] = OpLoad %v2double %i

Просмотреть файл

@ -26,10 +26,8 @@ void main() {
// CHECK-NEXT: [[inf_double:%[0-9]+]] = OpFConvert %double [[inf]]
// CHECK-NEXT: [[inf2_double:%[0-9]+]] = OpCompositeConstruct %v2double [[inf_double]] [[inf_double]]
// CHECK-NEXT: [[inf_double_0:%[0-9]+]] = OpFConvert %double [[inf]]
// CHECK-NEXT: [[inf_double_:%[0-9]+]] = OpFConvert %double [[inf]]
// CHECK-NEXT: [[inf_int64:%[0-9]+]] = OpConvertFToS %long [[inf_double_]]
// CHECK-NEXT: [[inf_double__0:%[0-9]+]] = OpFConvert %double [[inf]]
// CHECK-NEXT: [[inf_uint64:%[0-9]+]] = OpConvertFToU %ulong [[inf_double__0]]
// CHECK-NEXT: [[inf_int64:%[0-9]+]] = OpConvertFToS %long [[inf]]
// CHECK-NEXT: [[inf_uint64:%[0-9]+]] = OpConvertFToU %ulong [[inf]]
// CHECK-NEXT: {{%[0-9]+}} = OpCompositeConstruct %S [[inf2]] [[inf]] [[inf2_double]] [[inf_double_0]] [[inf_int64]] [[inf_uint64]]
S s3 = (S)(1.0 / 0.0);
@ -38,10 +36,8 @@ void main() {
// CHECK-NEXT: [[b_double:%[0-9]+]] = OpFConvert %double [[b]]
// CHECK-NEXT: [[b2_double:%[0-9]+]] = OpCompositeConstruct %v2double [[b_double]] [[b_double]]
// CHECK-NEXT: [[b_double_0:%[0-9]+]] = OpFConvert %double [[b]]
// CHECK-NEXT: [[b_double_:%[0-9]+]] = OpFConvert %double [[b]]
// CHECK-NEXT: [[b_int64:%[0-9]+]] = OpConvertFToS %long [[b_double_]]
// CHECK-NEXT: [[b_double__0:%[0-9]+]] = OpFConvert %double [[b]]
// CHECK-NEXT: [[b_uint64:%[0-9]+]] = OpConvertFToU %ulong [[b_double__0]]
// CHECK-NEXT: [[b_int64:%[0-9]+]] = OpConvertFToS %long [[b]]
// CHECK-NEXT: [[b_uint64:%[0-9]+]] = OpConvertFToU %ulong [[b]]
// CHECK-NEXT: {{%[0-9]+}} = OpCompositeConstruct %S [[b2_float]] [[b]] [[b2_double]] [[b_double_0]] [[b_int64]] [[b_uint64]]
float b;
S s2 = (S)(b);

Просмотреть файл

@ -78,7 +78,6 @@ void main()
// CHECK-DAG: [[U255:%[^ ]*]] = OpConstant [[UINT]] 255
// CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3
// CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0
// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64
buf[0] = (uint) colors;
// CHECK: [[COLORS:%[^ ]*]] = OpLoad [[TWOCOLORS]]
@ -106,8 +105,7 @@ void main()
lbuf[0] += (uint64_t) m;
// CHECK: [[MIX:%[^ ]*]] = OpLoad [[MIXED]]
// CHECK: [[MIX0:%[^ ]*]] = OpCompositeExtract [[FLOAT]] [[MIX]] 0
// CHECK: [[V1_1:%[^ ]*]] = OpFConvert [[DOUBLE]] [[MIX0]]
// CHECK: [[V2_0:%[^ ]*]] = OpConvertFToU [[ULONG]] [[V1_1]]
// CHECK: [[V2_0:%[^ ]*]] = OpConvertFToU [[ULONG]] [[MIX0]]
// CHECK: [[LBUF00_0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[LBUF]] [[I0]] [[U0]]
// CHECK: [[V3:%[^ ]*]] = OpLoad [[ULONG]] [[LBUF00_0]]
// CHECK: [[V4:%[^ ]*]] = OpIAdd [[ULONG]] [[V3]] [[V2_0]]