Bug 1763054 - Make ARM64 loadConstantDouble/Float32 the source of truth. r=nbp

Remove local decisions about how to load FP constants and centralize that knowledge in loadConstantDouble / loadConstantFloat32. Code that needs to load eg -0.0 can just pass that value to those functions and expect optimal code. Note in those functions that the optimal way of loading negative zero is to load it PC-relative from memory, not to construct it with a multi-instruction sequence. Differential Revision: https://phabricator.services.mozilla.com/D142917
2022-04-05 09:55:01 +00:00 · 2022-04-05 09:55:01 +00:00 · 8195e9adaa
--- a/js/src/jit/arm64/CodeGenerator-arm64.cpp
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@ -1399,13 +1399,13 @@ void CodeGenerator::visitUnbox(LUnbox* unbox) {
 }

 void CodeGenerator::visitDouble(LDouble* ins) {
-  ARMFPRegister output(ToFloatRegister(ins->getDef(0)), 64);
-  masm.Fmov(output, ins->value());
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantDouble(ins->value(), ToFloatRegister(out));
 }

 void CodeGenerator::visitFloat32(LFloat32* ins) {
-  ARMFPRegister output(ToFloatRegister(ins->getDef(0)), 32);
-  masm.Fmov(output, ins->value());
+  const LDefinition* out = ins->getDef(0);
+  masm.loadConstantFloat32(ins->value(), ToFloatRegister(out));
 }

 void CodeGenerator::visitTestDAndBranch(LTestDAndBranch* test) {
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@ -3112,9 +3112,8 @@ void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
                                    FloatRegister output) {
  ScratchDoubleScope scratch(*this);

-  // Double with only the sign bit set (= negative zero).
-  loadConstantDouble(0, scratch);
-  negateDouble(scratch);
+  // Double with only the sign bit set
+  loadConstantDouble(-0.0, scratch);

  if (lhs != output) {
    moveDouble(lhs, output);
@ -3129,9 +3128,8 @@ void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs,
                                     FloatRegister output) {
  ScratchFloat32Scope scratch(*this);

-  // Float with only the sign bit set (= negative zero).
-  loadConstantFloat32(0, scratch);
-  negateFloat(scratch);
+  // Float with only the sign bit set
+  loadConstantFloat32(-0.0f, scratch);

  if (lhs != output) {
    moveFloat32(lhs, output);
--- a/js/src/jit/arm64/MacroAssembler-arm64.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@ -1585,9 +1585,13 @@ class MacroAssemblerCompat : public vixl::MacroAssembler {
  }

  void loadConstantDouble(double d, FloatRegister dest) {
+    // Note, for -0.0 this will turn into a pc-relative load from memory, not a
+    // `fmov + fneg` sequence.  The former is believed to be better, as gcc,
+    // clang, and vixl all choose it.
    Fmov(ARMFPRegister(dest, 64), d);
  }
  void loadConstantFloat32(float f, FloatRegister dest) {
+    // See comment in loadConstantDouble.
    Fmov(ARMFPRegister(dest, 32), f);
  }