[spirv] Fix firstbithigh/low signedness (#5665)

The SPIR-V backend was incorrectly choosing to emit either FindSMsb or FindUMsb based on whether the result type of the call expression was signed or unsigned. Since the AST result type was always unsigned (and wrapped in a cast if necessary), this meant FindSMsb was never generated. FindSMsB should however be generated when the argument type is signed. This is indicated by the HLSL intrinsic op code (firstbithigh for signed and ufirstbithigh for unsigned). The SPIR-V generated now matches the DXIL generated. This bug was discovered while investigating #4702, so at the same time some error checking is added to make unimplemented cases explicit. Because the underlying GLSL.std.450 instructions FindSMsb, FindUMsb and FindILsB are currently limited to 32-bit width components, we now emit an error messages when this would otherwise result in invalid SPIR-V.
2023-09-08 09:09:34 -04:00 · 2023-09-08 09:09:34 -04:00 · b7a50ba6d7
--- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@ -8313,25 +8313,6 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
                                           srcLoc, srcRange);                  \
  } break

-#define INTRINSIC_OP_CASE_SINT_UINT(intrinsicOp, glslSintOp, glslUintOp,       \
-                                    doEachVec)                                 \
-  case hlsl::IntrinsicOp::IOP_##intrinsicOp: {                                 \
-    glslOpcode = isSintType ? GLSLstd450::GLSLstd450##glslSintOp               \
-                            : GLSLstd450::GLSLstd450##glslUintOp;              \
-    retVal = processIntrinsicUsingGLSLInst(callExpr, glslOpcode, doEachVec,    \
-                                           srcLoc, srcRange);                  \
-  } break
-
-#define INTRINSIC_OP_CASE_SINT_UINT_FLOAT(intrinsicOp, glslSintOp, glslUintOp, \
-                                          glslFloatOp, doEachVec)              \
-  case hlsl::IntrinsicOp::IOP_##intrinsicOp: {                                 \
-    glslOpcode = isFloatType  ? GLSLstd450::GLSLstd450##glslFloatOp            \
-                 : isSintType ? GLSLstd450::GLSLstd450##glslSintOp             \
-                              : GLSLstd450::GLSLstd450##glslUintOp;            \
-    retVal = processIntrinsicUsingGLSLInst(callExpr, glslOpcode, doEachVec,    \
-                                           srcLoc, srcRange);                  \
-  } break
-
  switch (const auto hlslOpcode = static_cast<hlsl::IntrinsicOp>(opcode)) {
  case hlsl::IntrinsicOp::IOP_InterlockedAdd:
  case hlsl::IntrinsicOp::IOP_InterlockedAnd:
@ -8741,6 +8722,18 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
    retVal = processIntrinsicUsingGLSLInst(callExpr, glslOpcode, true, srcLoc,
                                           srcRange);
    break;
+  }
+  case hlsl::IntrinsicOp::IOP_ufirstbithigh: {
+    retVal = processIntrinsicFirstbit(callExpr, GLSLstd450::GLSLstd450FindUMsb);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_firstbithigh: {
+    retVal = processIntrinsicFirstbit(callExpr, GLSLstd450::GLSLstd450FindSMsb);
+    break;
+  }
+  case hlsl::IntrinsicOp::IOP_firstbitlow: {
+    retVal = processIntrinsicFirstbit(callExpr, GLSLstd450::GLSLstd450FindILsb);
+    break;
  }
    INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true);
    INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false);
@ -8772,10 +8765,7 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
    INTRINSIC_OP_CASE(determinant, Determinant, false);
    INTRINSIC_OP_CASE(exp, Exp, true);
    INTRINSIC_OP_CASE(exp2, Exp2, true);
-    INTRINSIC_OP_CASE_SINT_UINT(firstbithigh, FindSMsb, FindUMsb, false);
-    INTRINSIC_OP_CASE_SINT_UINT(ufirstbithigh, FindSMsb, FindUMsb, false);
    INTRINSIC_OP_CASE(faceforward, FaceForward, false);
-    INTRINSIC_OP_CASE(firstbitlow, FindILsb, false);
    INTRINSIC_OP_CASE(floor, Floor, true);
    INTRINSIC_OP_CASE(fma, Fma, true);
    INTRINSIC_OP_CASE(frac, Fract, true);
@ -8813,6 +8803,26 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) {
  return retVal;
 }

+SpirvInstruction *
+SpirvEmitter::processIntrinsicFirstbit(const CallExpr *callExpr,
+                                       GLSLstd450 glslOpcode) {
+  const FunctionDecl *callee = callExpr->getDirectCallee();
+  const SourceLocation srcLoc = callExpr->getExprLoc();
+  const SourceRange srcRange = callExpr->getSourceRange();
+  const QualType argType = callExpr->getArg(0)->getType();
+
+  if (astContext.getTypeSize(argType) == 64) {
+    emitError("%0 is not yet implemented for 64-bit width components when "
+              "targetting SPIR-V",
+              srcLoc)
+        << getFunctionOrOperatorName(callee, true);
+    return nullptr;
+  }
+
+  return processIntrinsicUsingGLSLInst(callExpr, glslOpcode, false, srcLoc,
+                                       srcRange);
+}
+
 SpirvInstruction *
 SpirvEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
                                                hlsl::IntrinsicOp opcode) {
--- a/tools/clang/lib/SPIRV/SpirvEmitter.h
+++ b/tools/clang/lib/SPIRV/SpirvEmitter.h
@ -712,6 +712,10 @@ private:
  SpirvInstruction *processIntrinsicExecutionMode(const CallExpr *expr,
                                                  bool useIdParams);

+  /// Processes the 'firstbit{high|low}' intrinsic functions.
+  SpirvInstruction *processIntrinsicFirstbit(const CallExpr *,
+                                             GLSLstd450 glslOpcode);
+
 private:
  /// Returns the <result-id> for constant value 0 of the given type.
  SpirvConstant *getValueZero(QualType type);
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.firstbithigh.64bit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.firstbithigh.64bit.hlsl
@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 -E main
+
+void main() {
+  uint64_t   uint_1;
+  int fbh = firstbithigh(uint_1);
+}
+
+// CHECK: error: firstbithigh is not yet implemented for 64-bit width components when targetting SPIR-V
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.firstbithigh.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.firstbithigh.hlsl
@ -1,10 +1,5 @@
 // RUN: %dxc -T ps_6_0 -E main

-// Note: Even though the HLSL documentation contains a version of "firstbithigh" that 
-// takes signed integer(s) and returns signed integer(s), the frontend always generates
-// the AST using the overloaded version that takes unsigned integer(s) and returns
-// unsigned integer(s). Therefore "FindSMsb" is not generated in any case below.
-
 // CHECK: [[glsl:%\d+]] = OpExtInstImport "GLSL.std.450"

 void main() {
@ -13,15 +8,25 @@ void main() {
  uint  uint_1;
  uint4 uint_4;

-// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindUMsb {{%\d+}}
+// CHECK: [[sint_1:%\d+]] = OpLoad %int %sint_1
+// CHECK:    [[msb:%\d+]] = OpExtInst %uint [[glsl]] FindSMsb [[sint_1]]
+// CHECK:    [[res:%\d+]] = OpBitcast %int [[msb]]
+// CHECK:                   OpStore %fbh [[res]]
  int fbh = firstbithigh(sint_1);

-// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindUMsb {{%\d+}}
+// CHECK: [[sint_4:%\d+]] = OpLoad %v4int %sint_4
+// CHECK:    [[msb:%\d+]] = OpExtInst %v4uint [[glsl]] FindSMsb [[sint_4]]
+// CHECK:    [[res:%\d+]] = OpBitcast %v4int [[msb]]
+// CHECK:                   OpStore %fbh4 [[res]]
  int4 fbh4 = firstbithigh(sint_4);

-// CHECK: {{%\d+}} = OpExtInst %uint [[glsl]] FindUMsb {{%\d+}}
+// CHECK: [[uint_1:%\d+]] = OpLoad %uint %uint_1
+// CHECK:    [[msb:%\d+]] = OpExtInst %uint [[glsl]] FindUMsb [[uint_1]]
+// CHECK:                   OpStore %ufbh [[msb]]
  uint ufbh = firstbithigh(uint_1);

-// CHECK: {{%\d+}} = OpExtInst %v4uint [[glsl]] FindUMsb {{%\d+}}
+// CHECK: [[uint_4:%\d+]] = OpLoad %v4uint %uint_4
+// CHECK:    [[msb:%\d+]] = OpExtInst %v4uint [[glsl]] FindUMsb [[uint_4]]
+// CHECK:                   OpStore %ufbh4 [[msb]]
  uint4 ufbh4 = firstbithigh(uint_4);
 }
--- a/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl
+++ b/tools/clang/test/CodeGenSPIRV/intrinsics.firstbitlow.64bit.hlsl
@ -0,0 +1,8 @@
+// RUN: %dxc -T ps_6_0 -E main
+
+void main() {
+  uint64_t   uint_1;
+  int fbl = firstbitlow(uint_1);
+}
+
+// CHECK: error: firstbitlow is not yet implemented for 64-bit width components when targetting SPIR-V
--- a/tools/clang/test/CodeGenSPIRV_Lit/shader.debug.line.intrinsic.hlsl
+++ b/tools/clang/test/CodeGenSPIRV_Lit/shader.debug.line.intrinsic.hlsl
@ -176,7 +176,7 @@ void main() {
 // CHECK-NEXT:                DebugLine [[src]] %uint_180 %uint_180 %uint_20 %uint_43
 // CHECK-NEXT:                OpExtInst %float {{%[0-9]+}} Sqrt [[abs]]
 // CHECK:      DebugLine [[src]] %uint_180 %uint_180 %uint_7 %uint_52
-// CHECK-NEXT: OpExtInst %uint {{%[0-9]+}} FindUMsb
+// CHECK-NEXT: OpExtInst %uint {{%[0-9]+}} FindSMsb
  max(firstbithigh(sqrt(abs(v2f.x * v4f.w)) + v4i.x),
 // CHECK:      DebugLine [[src]] %uint_183 %uint_183 %uint_7 %uint_16
 // CHECK-NEXT: OpExtInst %float {{%[0-9]+}} Cos
--- a/tools/clang/test/CodeGenSPIRV_Lit/spirv.debug.opline.intrinsic.hlsl
+++ b/tools/clang/test/CodeGenSPIRV_Lit/spirv.debug.opline.intrinsic.hlsl
@ -176,7 +176,7 @@ void main() {
 // CHECK-NEXT:                OpLine [[file]] 180 20
 // CHECK-NEXT:                OpExtInst %float {{%[0-9]+}} Sqrt [[abs]]
 // CHECK:      OpLine [[file]] 180 7
-// CHECK-NEXT: OpExtInst %uint {{%[0-9]+}} FindUMsb
+// CHECK-NEXT: OpExtInst %uint {{%[0-9]+}} FindSMsb
  max(firstbithigh(sqrt(abs(v2f.x * v4f.w)) + v4i.x),
 // CHECK:      OpLine [[file]] 183 7
 // CHECK-NEXT: OpExtInst %float {{%[0-9]+}} Cos
--- a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp
+++ b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp
@ -1060,9 +1060,15 @@ TEST_F(FileTest, IntrinsicsFaceForward) {
 TEST_F(FileTest, IntrinsicsFirstBitHigh) {
  runFileTest("intrinsics.firstbithigh.hlsl");
 }
+TEST_F(FileTest, IntrinsicsFirstBitHigh64bit) {
+  runFileTest("intrinsics.firstbithigh.64bit.hlsl", Expect::Failure);
+}
 TEST_F(FileTest, IntrinsicsFirstBitLow) {
  runFileTest("intrinsics.firstbitlow.hlsl");
 }
+TEST_F(FileTest, IntrinsicsFirstBitLow64bit) {
+  runFileTest("intrinsics.firstbitlow.64bit.hlsl", Expect::Failure);
+}
 TEST_F(FileTest, IntrinsicsPrintf) { runFileTest("intrinsics.printf.hlsl"); }
 TEST_F(FileTest, IntrinsicsFloor) { runFileTest("intrinsics.floor.hlsl"); }
 TEST_F(FileTest, IntrinsicsFmod) { runFileTest("intrinsics.fmod.hlsl"); }