[SPIR-V] Remove bitcast from float->int conversion (#6533)

OpConvertFToU and FToS convert numerically from floating point to integer, with rounding toward 0.0, matching the defined behavior of HLSL without any need for an initial bitwidth conversion from floating point to floating point, which can result in incorrect rounding behavior (see #6501). Note that behavior is undefined if the target type is not wide enough to hold the converted value, however this was also true with an initial bitcast (because an N-bit FP can hold values outside the range of an N-bit int), and I can't come up with any case where an intial truncation would result in correct defined behavior where this straight conversion would be undefined since the precision will inevitably be lost anyways. Fixes #6501
2024-04-18 09:31:45 -04:00 · 2024-04-18 09:31:45 -04:00 · 9ee056e597
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@ -8233,17 +8233,14 @@ SpirvInstruction *SpirvEmitter::castToInt(SpirvInstruction *fromVal,
  }

  if (isFloatOrVecOfFloatType(fromType)) {
-    // First convert the source to the bitwidth of the destination if necessary.
-    fromVal = convertBitwidth(fromVal, srcLoc, fromType, toIntType, nullptr,
-                              srcRange);
    if (isSintOrVecOfSintType(toIntType)) {
      return spvBuilder.createUnaryOp(spv::Op::OpConvertFToS, toIntType,
                                      fromVal, srcLoc, srcRange);
-    } else if (isUintOrVecOfUintType(toIntType)) {
+    }
+
+    if (isUintOrVecOfUintType(toIntType)) {
      return spvBuilder.createUnaryOp(spv::Op::OpConvertFToU, toIntType,
                                      fromVal, srcLoc, srcRange);
-    } else {
-      emitError("casting from floating point to integer unimplemented", srcLoc);
    }
  }

@ -8310,6 +8307,7 @@ SpirvInstruction *SpirvEmitter::castToInt(SpirvInstruction *fromVal,
    return result;
  }

+  emitError("casting from given type to integer unimplemented", srcLoc);
  return nullptr;
 }

--- a/tools/clang/test/CodeGenSPIRV/cast.bitwidth.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.bitwidth.hlsl
@ -41,8 +41,7 @@ void main() {
  // 32-bit float to various 64-bit types.
  float aaa;
 // CHECK:             [[aaa:%[0-9]+]] = OpLoad %float %aaa
-// CHECK-NEXT: [[aaa_double:%[0-9]+]] = OpFConvert %double [[aaa]]
-// CHECK-NEXT:  [[aaa_ulong:%[0-9]+]] = OpConvertFToU %ulong [[aaa_double]]
+// CHECK-NEXT:  [[aaa_ulong:%[0-9]+]] = OpConvertFToU %ulong [[aaa]]
 // CHECK-NEXT:                       OpStore %bbb [[aaa_ulong]]
  uint64_t bbb = aaa;
 // CHECK:             [[aaa_0:%[0-9]+]] = OpLoad %float %aaa
@ -50,8 +49,7 @@ void main() {
 // CHECK-NEXT:                       OpStore %ccc [[aaa_double_0]]
  double   ccc = aaa;
 // CHECK:             [[aaa_1:%[0-9]+]] = OpLoad %float %aaa
-// CHECK-NEXT: [[aaa_double_1:%[0-9]+]] = OpFConvert %double [[aaa_1]]
-// CHECK-NEXT:   [[aaa_long:%[0-9]+]] = OpConvertFToS %long [[aaa_double_1]]
+// CHECK-NEXT:   [[aaa_long:%[0-9]+]] = OpConvertFToS %long [[aaa_1]]
 // CHECK-NEXT:                       OpStore %ddd [[aaa_long]]
  int64_t  ddd = aaa;

@ -95,8 +93,7 @@ void main() {
  // 64-bit float to various 32-bit types.
  double eee;
 // CHECK:         [[e64_2:%[0-9]+]] = OpLoad %double %eee
-// CHECK-NEXT:    [[e32_2:%[0-9]+]] = OpFConvert %float [[e64_2]]
-// CHECK-NEXT: [[e_uint_0:%[0-9]+]] = OpConvertFToU %uint [[e32_2]]
+// CHECK-NEXT: [[e_uint_0:%[0-9]+]] = OpConvertFToU %uint [[e64_2]]
 // CHECK-NEXT:                   OpStore %fff [[e_uint_0]]
  uint  fff = eee;
 // CHECK:              [[e_2:%[0-9]+]] = OpLoad %double %eee
@ -104,8 +101,7 @@ void main() {
 // CHECK-NEXT:                      OpStore %ggg [[e_float_1]]
  float ggg = eee;
 // CHECK:            [[e_3:%[0-9]+]] = OpLoad %double %eee
-// CHECK-NEXT: [[e_float_2:%[0-9]+]] = OpFConvert %float [[e_3]]
-// CHECK-NEXT:   [[e_int_3:%[0-9]+]] = OpConvertFToS %int [[e_float_2]]
+// CHECK-NEXT:   [[e_int_3:%[0-9]+]] = OpConvertFToS %int [[e_3]]
 // CHECK-NEXT:                    OpStore %hhh [[e_int_3]]
  int   hhh = eee;

@ -113,13 +109,11 @@ void main() {
  // Vector case: 64-bit float to various 32-bit types.
  double2 i;
 // CHECK:      [[i_double:%[0-9]+]] = OpLoad %v2double %i
-// CHECK-NEXT:  [[i_float:%[0-9]+]] = OpFConvert %v2float [[i_double]]
-// CHECK-NEXT:   [[i_uint:%[0-9]+]] = OpConvertFToU %v2uint [[i_float]]
+// CHECK-NEXT:   [[i_uint:%[0-9]+]] = OpConvertFToU %v2uint [[i_double]]
 // CHECK-NEXT:                     OpStore %j [[i_uint]]
  uint2   j = i;
 // CHECK:      [[i_double_0:%[0-9]+]] = OpLoad %v2double %i
-// CHECK-NEXT:  [[i_float_0:%[0-9]+]] = OpFConvert %v2float [[i_double_0]]
-// CHECK-NEXT:    [[i_int:%[0-9]+]] = OpConvertFToS %v2int [[i_float_0]]
+// CHECK-NEXT:    [[i_int:%[0-9]+]] = OpConvertFToS %v2int [[i_double_0]]
 // CHECK-NEXT:                     OpStore %k [[i_int]]
  int2    k = i;
 // CHECK:      [[i_double_1:%[0-9]+]] = OpLoad %v2double %i
--- a/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.literal-initializer.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.flat-conversion.literal-initializer.hlsl
@ -26,10 +26,8 @@ void main() {
 // CHECK-NEXT:  [[inf_double:%[0-9]+]] = OpFConvert %double [[inf]]
 // CHECK-NEXT: [[inf2_double:%[0-9]+]] = OpCompositeConstruct %v2double [[inf_double]] [[inf_double]] 
 // CHECK-NEXT:  [[inf_double_0:%[0-9]+]] = OpFConvert %double [[inf]]
-// CHECK-NEXT: [[inf_double_:%[0-9]+]] = OpFConvert %double [[inf]]
-// CHECK-NEXT:   [[inf_int64:%[0-9]+]] = OpConvertFToS %long [[inf_double_]]
-// CHECK-NEXT: [[inf_double__0:%[0-9]+]] = OpFConvert %double [[inf]]
-// CHECK-NEXT:  [[inf_uint64:%[0-9]+]] = OpConvertFToU %ulong [[inf_double__0]]
+// CHECK-NEXT:   [[inf_int64:%[0-9]+]] = OpConvertFToS %long [[inf]]
+// CHECK-NEXT:  [[inf_uint64:%[0-9]+]] = OpConvertFToU %ulong [[inf]]
 // CHECK-NEXT:             {{%[0-9]+}} = OpCompositeConstruct %S [[inf2]] [[inf]] [[inf2_double]] [[inf_double_0]] [[inf_int64]] [[inf_uint64]]
  S s3 = (S)(1.0 / 0.0);

@ -38,10 +36,8 @@ void main() {
 // CHECK-NEXT:  [[b_double:%[0-9]+]] = OpFConvert %double [[b]]
 // CHECK-NEXT: [[b2_double:%[0-9]+]] = OpCompositeConstruct %v2double [[b_double]] [[b_double]]
 // CHECK-NEXT:  [[b_double_0:%[0-9]+]] = OpFConvert %double [[b]]
-// CHECK-NEXT: [[b_double_:%[0-9]+]] = OpFConvert %double [[b]]
-// CHECK-NEXT:   [[b_int64:%[0-9]+]] = OpConvertFToS %long [[b_double_]]
-// CHECK-NEXT: [[b_double__0:%[0-9]+]] = OpFConvert %double [[b]]
-// CHECK-NEXT:  [[b_uint64:%[0-9]+]] = OpConvertFToU %ulong [[b_double__0]]
+// CHECK-NEXT:   [[b_int64:%[0-9]+]] = OpConvertFToS %long [[b]]
+// CHECK-NEXT:  [[b_uint64:%[0-9]+]] = OpConvertFToU %ulong [[b]]
 // CHECK-NEXT:           {{%[0-9]+}} = OpCompositeConstruct %S [[b2_float]] [[b]] [[b2_double]] [[b_double_0]] [[b_int64]] [[b_uint64]]
  float b;
  S s2 = (S)(b);
--- a/tools/clang/test/CodeGenSPIRV/cast.struct-to-int.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/cast.struct-to-int.hlsl
@ -78,7 +78,6 @@ void main()
 // CHECK-DAG: [[U255:%[^ ]*]] = OpConstant [[UINT]] 255
 // CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3
 // CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0
-// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64

    buf[0] = (uint) colors;
 // CHECK: [[COLORS:%[^ ]*]] = OpLoad [[TWOCOLORS]]
@ -106,8 +105,7 @@ void main()
    lbuf[0] += (uint64_t) m;
 // CHECK: [[MIX:%[^ ]*]] = OpLoad [[MIXED]]
 // CHECK: [[MIX0:%[^ ]*]] = OpCompositeExtract [[FLOAT]] [[MIX]] 0
-// CHECK: [[V1_1:%[^ ]*]] = OpFConvert [[DOUBLE]] [[MIX0]]
-// CHECK: [[V2_0:%[^ ]*]] = OpConvertFToU [[ULONG]] [[V1_1]]
+// CHECK: [[V2_0:%[^ ]*]] = OpConvertFToU [[ULONG]] [[MIX0]]
 // CHECK: [[LBUF00_0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[LBUF]] [[I0]] [[U0]]
 // CHECK: [[V3:%[^ ]*]] = OpLoad [[ULONG]] [[LBUF00_0]]
 // CHECK: [[V4:%[^ ]*]] = OpIAdd [[ULONG]] [[V3]] [[V2_0]]