Bug 1523568 - ARM64: Implement divisions by a constant. r=sstangl

Differential Revision: https://phabricator.services.mozilla.com/D31805 --HG-- extra : moz-landing-system : lando
2019-05-27 16:36:26 +00:00 · 2019-05-27 16:36:26 +00:00 · 85124b1e85
--- a/js/src/jit-test/tests/ion/idiv-by-constant.js
+++ b/js/src/jit-test/tests/ion/idiv-by-constant.js
@ -0,0 +1,108 @@
+function int_seq(count) {
+    var arr = [];
+    var x = 0xfac83126;
+    while (count--) {
+        x ^= x << 13;
+        x ^= x >> 17;
+        x ^= x << 5;
+        arr.push(x | 0);
+    }
+    return arr;
+}
+
+function test(name, asm, ion, int) {
+    let count = 10000;
+    let seq = int_seq(count);
+    for (let x of seq) {
+        let rint = int(x);
+        let rasm = asm(x);
+        let rion = ion(x);
+        // console.log(name, x, rint, rasm, rion);
+        assertEq(rasm, rint);
+        assertEq(rion, rint);
+    }
+}
+
+var asmdiv2 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x|0) / 2)|0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv2 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 2)|0;
+    return z|0;
+}
+
+var interpdiv2 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 2)|0;
+    return z|0;
+}
+
+test("div2", asmdiv2, plaindiv2, interpdiv2);
+
+var asmdiv3 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x|0) / 3)|0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv3 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 3)|0;
+    return z|0;
+}
+
+var interpdiv3 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 3)|0;
+    return z|0;
+}
+
+test("div3", asmdiv3, plaindiv3, interpdiv3);
+
+var asmdiv7 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x|0) / 7)|0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv7 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 7)|0;
+    return z|0;
+}
+
+var interpdiv7 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x|0) / 7)|0;
+    return z|0;
+}
+
+test("div7", asmdiv7, plaindiv7, interpdiv7);
--- a/js/src/jit-test/tests/ion/udiv-by-constant.js
+++ b/js/src/jit-test/tests/ion/udiv-by-constant.js
@ -0,0 +1,114 @@
+function uint_seq(count) {
+    with({}){}
+    var arr = [];
+    var x = 0xfac83126;
+    while (count--) {
+        x ^= x << 13;
+        x ^= x >> 17;
+        x ^= x << 5;
+        // SpiderMonkey does not know how to represent UInt32, only Int32, and
+        // including any UInt32 will cause the following function to be
+        // de-optimized as double math.
+        if (x|0 > 0)
+            arr.push(x|0);
+    }
+    return arr;
+}
+
+function test(name, asm, ion, int) {
+    with({}){}
+    let count = 10000;
+    let seq = uint_seq(count);
+    for (let x of seq) {
+        let rint = int(x);
+        let rasm = asm(x);
+        let rion = ion(x);
+        // console.log(name, x, rint, rasm, rion);
+        assertEq(rasm, rint);
+        assertEq(rion, rint);
+    }
+}
+
+var asmdiv2 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 2)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv2 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 2)>>>0;
+    return z|0;
+}
+
+var interpdiv2 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 2)>>>0;
+    return z|0;
+}
+
+test("div2", asmdiv2, plaindiv2, interpdiv2);
+
+var asmdiv5 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 5)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv5 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 5)>>>0;
+    return z|0;
+}
+
+var interpdiv5 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 5)>>>0;
+    return z|0;
+}
+
+test("div5", asmdiv5, plaindiv5, interpdiv5);
+
+var asmdiv7 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 7)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv7 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 7)>>>0;
+    return z|0;
+}
+
+var interpdiv7 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 7)>>>0;
+    return z|0;
+}
+
+test("div7", asmdiv7, plaindiv7, interpdiv7);
--- a/js/src/jit-test/tests/ion/udiv-by-u32-constant.js
+++ b/js/src/jit-test/tests/ion/udiv-by-u32-constant.js
@ -0,0 +1,110 @@
+function uint_seq(count) {
+    with({}){}
+    var arr = [];
+    var x = 0xfac83126;
+    while (count--) {
+        x ^= x << 13;
+        x ^= x >> 17;
+        x ^= x << 5;
+        arr.push(x >>> 0);
+    }
+    return arr;
+}
+
+function test(name, asm, ion, int) {
+    with({}){}
+    let count = 10000;
+    let seq = uint_seq(count);
+    for (let x of seq) {
+        let rint = int(x);
+        let rasm = asm(x);
+        let rion = ion(x);
+        // console.log(name, x, rint, rasm, rion);
+        assertEq(rasm, rint);
+        assertEq(rion, rint);
+    }
+}
+
+var asmdiv2 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 2)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv2 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 2)>>>0;
+    return z|0;
+}
+
+var interpdiv2 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 2)>>>0;
+    return z|0;
+}
+
+test("div2", asmdiv2, plaindiv2, interpdiv2);
+
+var asmdiv3 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 3)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv3 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 3)>>>0;
+    return z|0;
+}
+
+var interpdiv3 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 3)>>>0;
+    return z|0;
+}
+
+test("div3", asmdiv3, plaindiv3, interpdiv3);
+
+var asmdiv7 = (function(m) {
+    "use asm"
+    function f(x) {
+        x = x|0;
+        var z = 0;
+        z = ((x>>>0) / 7)>>>0;
+        return z|0;
+    }
+    return f;
+})()
+
+var plaindiv7 = function(x) {
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 7)>>>0;
+    return z|0;
+}
+
+var interpdiv7 = function(x) {
+    with({}){};
+    x = x|0;
+    var z = 0;
+    z = ((x>>>0) / 7)>>>0;
+    return z|0;
+}
+
+test("div7", asmdiv7, plaindiv7, interpdiv7);
--- a/js/src/jit/arm64/CodeGenerator-arm64.cpp
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@ -457,7 +457,230 @@ void CodeGenerator::visitDivI(LDivI* ins) {
 }

 void CodeGenerator::visitDivPowTwoI(LDivPowTwoI* ins) {
-  MOZ_CRASH("CodeGenerator::visitDivPowTwoI");
+  const Register numerator = ToRegister(ins->numerator());
+  const ARMRegister numerator32 = toWRegister(ins->numerator());
+  const ARMRegister output32 = toWRegister(ins->output());
+
+  int32_t shift = ins->shift();
+  bool negativeDivisor = ins->negativeDivisor();
+  MDiv* mir = ins->mir();
+
+  if (!mir->isTruncated() && negativeDivisor) {
+    // 0 divided by a negative number returns a -0 double.
+    bailoutTest32(Assembler::Zero, numerator, numerator, ins->snapshot());
+  }
+
+  if (shift) {
+    if (!mir->isTruncated()) {
+      // If the remainder is != 0, bailout since this must be a double.
+      bailoutTest32(Assembler::NonZero, numerator,
+                    Imm32(UINT32_MAX >> (32 - shift)), ins->snapshot());
+    }
+
+    if (mir->isUnsigned()) {
+      // shift right
+      masm.Lsr(output32, numerator32, shift);
+    } else {
+      ARMRegister temp32 = numerator32;
+      // Adjust the value so that shifting produces a correctly
+      // rounded result when the numerator is negative. See 10-1
+      // "Signed Division by a Known Power of 2" in Henry
+      // S. Warren, Jr.'s Hacker's Delight.
+      if (mir->canBeNegativeDividend() && mir->isTruncated()) {
+        if (shift > 1) {
+          // Copy the sign bit of the numerator. (= (2^32 - 1) or 0)
+          masm.Asr(output32, numerator32, 31);
+          temp32 = output32;
+        }
+        // Divide by 2^(32 - shift)
+        // i.e. (= (2^32 - 1) / 2^(32 - shift) or 0)
+        // i.e. (= (2^shift - 1) or 0)
+        masm.Lsr(output32, temp32, 32 - shift);
+        // If signed, make any 1 bit below the shifted bits to bubble up, such
+        // that once shifted the value would be rounded towards 0.
+        masm.Add(output32, output32, numerator32);
+        temp32 = output32;
+      }
+      masm.Asr(output32, temp32, shift);
+
+      if (negativeDivisor) {
+        masm.Neg(output32, output32);
+      }
+    }
+    return;
+  }
+
+  if (negativeDivisor) {
+    // INT32_MIN / -1 overflows.
+    if (!mir->isTruncated()) {
+      masm.Negs(output32, numerator32);
+      bailoutIf(Assembler::Overflow, ins->snapshot());
+    } else if (mir->trapOnError()) {
+      Label ok;
+      masm.Negs(output32, numerator32);
+      masm.branch(Assembler::NoOverflow, &ok);
+      masm.wasmTrap(wasm::Trap::IntegerOverflow, mir->bytecodeOffset());
+      masm.bind(&ok);
+    } else {
+      // Do not set condition flags.
+      masm.Neg(output32, numerator32);
+    }
+  } else {
+    if (mir->isUnsigned() && !mir->isTruncated()) {
+      // Copy and set flags.
+      masm.Adds(output32, numerator32, 0);
+      // Unsigned division by 1 can overflow if output is not truncated, as we
+      // do not have an Unsigned type for MIR instructions.
+      bailoutIf(Assembler::Signed, ins->snapshot());
+    } else {
+      // Copy the result.
+      masm.Mov(output32, numerator32);
+    }
+  }
+}
+
+void CodeGenerator::visitDivConstantI(LDivConstantI* ins) {
+  const ARMRegister lhs32 = toWRegister(ins->numerator());
+  const ARMRegister lhs64 = toXRegister(ins->numerator());
+  const ARMRegister const32 = toWRegister(ins->temp());
+  const ARMRegister output32 = toWRegister(ins->output());
+  const ARMRegister output64 = toXRegister(ins->output());
+  int32_t d = ins->denominator();
+
+  // The absolute value of the denominator isn't a power of 2.
+  using mozilla::Abs;
+  MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
+
+  // We will first divide by Abs(d), and negate the answer if d is negative.
+  // If desired, this can be avoided by generalizing computeDivisionConstants.
+  ReciprocalMulConstants rmc =
+      computeDivisionConstants(Abs(d), /* maxLog = */ 31);
+
+  // We first compute (M * n) >> 32, where M = rmc.multiplier.
+  masm.Mov(const32, int32_t(rmc.multiplier));
+  if (rmc.multiplier > INT32_MAX) {
+    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
+
+    // We actually compute (int32_t(M) * n) instead, without the upper bit.
+    // Thus, (M * n) = (int32_t(M) * n) + n << 32.
+    //
+    // ((int32_t(M) * n) + n << 32) can't overflow, as both operands have
+    // opposite signs because int32_t(M) is negative.
+    masm.Lsl(output64, lhs64, 32);
+
+    // Store (M * n) in output64.
+    masm.Smaddl(output64, const32, lhs32, output64);
+  } else {
+    // Store (M * n) in output64.
+    masm.Smull(output64, const32, lhs32);
+  }
+
+  // (M * n) >> (32 + shift) is the truncated division answer if n is
+  // non-negative, as proved in the comments of computeDivisionConstants. We
+  // must add 1 later if n is negative to get the right answer in all cases.
+  masm.Asr(output64, output64, 32 + rmc.shiftAmount);
+
+  // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
+  // computed with just a sign-extending shift of 31 bits.
+  if (ins->canBeNegativeDividend()) {
+    masm.Asr(const32, lhs32, 31);
+    masm.Sub(output32, output32, const32);
+  }
+
+  // After this, output32 contains the correct truncated division result.
+  if (d < 0) {
+    masm.Neg(output32, output32);
+  }
+
+  if (!ins->mir()->isTruncated()) {
+    // This is a division op. Multiply the obtained value by d to check if
+    // the correct answer is an integer. This cannot overflow, since |d| > 1.
+    masm.Mov(const32, d);
+    masm.Msub(const32, output32, const32, lhs32);
+    // bailout if (lhs - output * d != 0)
+    masm.Cmp(const32, const32);
+    auto bailoutCond = Assembler::NonZero;
+
+    // If lhs is zero and the divisor is negative, the answer should have
+    // been -0.
+    if (d < 0) {
+      // or bailout if (lhs == 0).
+      // ^                  ^
+      // |                  '-- masm.Ccmp(lhs32, lhs32, .., ..)
+      // '-- masm.Ccmp(.., .., vixl::ZFlag, ! bailoutCond)
+      masm.Ccmp(lhs32, lhs32, vixl::ZFlag, Assembler::Zero);
+      bailoutCond = Assembler::Zero;
+    }
+
+    // bailout if (lhs - output * d != 0) or (d < 0 && lhs == 0)
+    bailoutIf(bailoutCond, ins->snapshot());
+  }
+}
+
+void CodeGenerator::visitUDivConstantI(LUDivConstantI* ins) {
+  const ARMRegister lhs32 = toWRegister(ins->numerator());
+  const ARMRegister lhs64 = toXRegister(ins->numerator());
+  const ARMRegister const32 = toWRegister(ins->temp());
+  const ARMRegister output32 = toWRegister(ins->output());
+  const ARMRegister output64 = toXRegister(ins->output());
+  uint32_t d = ins->denominator();
+
+  if (d == 0) {
+    if (ins->mir()->isTruncated()) {
+      if (ins->mir()->trapOnError()) {
+        masm.wasmTrap(wasm::Trap::IntegerDivideByZero,
+                      ins->mir()->bytecodeOffset());
+      } else {
+        masm.Mov(output32, wzr);
+      }
+    } else {
+      bailout(ins->snapshot());
+    }
+    return;
+  }
+
+  // The denominator isn't a power of 2 (see LDivPowTwoI).
+  MOZ_ASSERT((d & (d - 1)) != 0);
+
+  ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32);
+
+  // We first compute (M * n) >> 32, where M = rmc.multiplier.
+  masm.Mov(const32, int32_t(rmc.multiplier));
+  masm.Umull(output64, const32, lhs32);
+  if (rmc.multiplier > UINT32_MAX) {
+    // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
+    // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d,
+    // contradicting the proof of correctness in computeDivisionConstants.
+    MOZ_ASSERT(rmc.shiftAmount > 0);
+    MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
+
+    // We actually compute (uint32_t(M) * n) instead, without the upper bit.
+    // Thus, (M * n) = (uint32_t(M) * n) + n << 32.
+    //
+    // ((uint32_t(M) * n) + n << 32) can overflow. Hacker's Delight explains a
+    // trick to avoid this overflow case, but we can avoid it by computing the
+    // addition on 64 bits registers.
+    //
+    // Compute ((uint32_t(M) * n) >> 32 + n)
+    masm.Add(output64, lhs64, Operand(output64, vixl::LSR, 32));
+
+    // (M * n) >> (32 + shift) is the truncated division answer.
+    masm.Asr(output64, output64, rmc.shiftAmount);
+  } else {
+    // (M * n) >> (32 + shift) is the truncated division answer.
+    masm.Asr(output64, output64, 32 + rmc.shiftAmount);
+  }
+
+  // We now have the truncated division value. We are checking whether the
+  // division resulted in an integer, we multiply the obtained value by d and
+  // check the remainder of the division.
+  if (!ins->mir()->isTruncated()) {
+    masm.Mov(const32, d);
+    masm.Msub(const32, output32, const32, lhs32);
+    // bailout if (lhs - output * d != 0)
+    masm.Cmp(const32, const32);
+    bailoutIf(Assembler::NonZero, ins->snapshot());
+  }
 }

 void CodeGeneratorARM64::modICommon(MMod* mir, Register lhs, Register rhs,
--- a/js/src/jit/arm64/LIR-arm64.h
+++ b/js/src/jit/arm64/LIR-arm64.h
@ -82,22 +82,65 @@ class LDivI : public LBinaryMath<1> {

 class LDivPowTwoI : public LInstructionHelper<1, 1, 0> {
  const int32_t shift_;
+  const bool negativeDivisor_;

 public:
  LIR_HEADER(DivPowTwoI)

-  LDivPowTwoI(const LAllocation& lhs, int32_t shift)
-      : LInstructionHelper(classOpcode), shift_(shift) {
+  LDivPowTwoI(const LAllocation& lhs, int32_t shift, bool negativeDivisor)
+      : LInstructionHelper(classOpcode),
+        shift_(shift),
+        negativeDivisor_(negativeDivisor) {
    setOperand(0, lhs);
  }

  const LAllocation* numerator() { return getOperand(0); }

  int32_t shift() { return shift_; }
+  bool negativeDivisor() { return negativeDivisor_; }

  MDiv* mir() const { return mir_->toDiv(); }
 };

+class LDivConstantI : public LInstructionHelper<1, 1, 1> {
+  const int32_t denominator_;
+
+ public:
+  LIR_HEADER(DivConstantI)
+
+  LDivConstantI(const LAllocation& lhs, int32_t denominator,
+                const LDefinition& temp)
+      : LInstructionHelper(classOpcode), denominator_(denominator) {
+    setOperand(0, lhs);
+    setTemp(0, temp);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+  const LDefinition* temp() { return getTemp(0); }
+  int32_t denominator() const { return denominator_; }
+  MDiv* mir() const { return mir_->toDiv(); }
+  bool canBeNegativeDividend() const { return mir()->canBeNegativeDividend(); }
+};
+
+class LUDivConstantI : public LInstructionHelper<1, 1, 1> {
+  const int32_t denominator_;
+
+ public:
+  LIR_HEADER(UDivConstantI)
+
+  LUDivConstantI(const LAllocation& lhs, int32_t denominator,
+                 const LDefinition& temp)
+      : LInstructionHelper(classOpcode), denominator_(denominator) {
+    setOperand(0, lhs);
+    setTemp(0, temp);
+  }
+
+  const LAllocation* numerator() { return getOperand(0); }
+  const LDefinition* temp() { return getTemp(0); }
+  int32_t denominator() const { return denominator_; }
+  MDiv* mir() const { return mir_->toDiv(); }
+};
+
 class LModI : public LBinaryMath<1> {
 public:
  LIR_HEADER(ModI);
--- a/js/src/jit/arm64/Lowering-arm64.cpp
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@ -6,6 +6,8 @@

 #include "jit/arm64/Lowering-arm64.h"

+#include "mozilla/MathAlgorithms.h"
+
 #include "jit/arm64/Assembler-arm64.h"
 #include "jit/Lowering.h"
 #include "jit/MIR.h"
@ -203,8 +205,28 @@ void LIRGeneratorARM64::lowerDivI(MDiv* div) {
    return;
  }

-  // TODO (Bug 1523568): Implement the division-avoidance paths when rhs is
-  // constant.
+  if (div->rhs()->isConstant()) {
+    LAllocation lhs = useRegister(div->lhs());
+    int32_t rhs = div->rhs()->toConstant()->toInt32();
+    int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+    if (rhs != 0 && uint32_t(1) << shift == mozilla::Abs(rhs)) {
+      LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, rhs < 0);
+      if (div->fallible()) {
+        assignSnapshot(lir, Bailout_DoubleOutput);
+      }
+      define(lir, div);
+      return;
+    }
+    if (rhs != 0) {
+      LDivConstantI* lir = new (alloc()) LDivConstantI(lhs, rhs, temp());
+      if (div->fallible()) {
+        assignSnapshot(lir, Bailout_DoubleOutput);
+      }
+      define(lir, div);
+      return;
+    }
+  }

  LDivI* lir = new (alloc())
      LDivI(useRegister(div->lhs()), useRegister(div->rhs()), temp());
@ -310,8 +332,26 @@ void LIRGenerator::visitWasmNeg(MWasmNeg* ins) {

 void LIRGeneratorARM64::lowerUDiv(MDiv* div) {
  LAllocation lhs = useRegister(div->lhs());
-  // TODO (Bug 1523568): Implement the division-avoidance paths when rhs is
-  // constant.
+  if (div->rhs()->isConstant()) {
+    int32_t rhs = div->rhs()->toConstant()->toInt32();
+    int32_t shift = mozilla::FloorLog2(mozilla::Abs(rhs));
+
+    if (rhs != 0 && uint32_t(1) << shift == mozilla::Abs(rhs)) {
+      LDivPowTwoI* lir = new (alloc()) LDivPowTwoI(lhs, shift, false);
+      if (div->fallible()) {
+        assignSnapshot(lir, Bailout_DoubleOutput);
+      }
+      define(lir, div);
+      return;
+    }
+
+    LUDivConstantI* lir = new (alloc()) LUDivConstantI(lhs, rhs, temp());
+    if (div->fallible()) {
+      assignSnapshot(lir, Bailout_DoubleOutput);
+    }
+    define(lir, div);
+    return;
+  }

  // Generate UDiv
  LAllocation rhs = useRegister(div->rhs());