[QNN] Requantize - Optimize lowering for some corner cases. (#3864)

2019-09-01 20:58:38 -06:00 · 2019-09-01 20:58:38 -06:00 · 1bc8385304
--- a/src/relay/qnn/op/requantize.cc
+++ b/src/relay/qnn/op/requantize.cc
@ -129,6 +129,9 @@ Expr RequantizeLower(const Expr& input_tensor, const RequantizeAttrs* param,
    tensor = Subtract(tensor, input_zp);
  }

+  // If the input and output scales are same, we can skip the fixed point multiplication.
+  auto scaled_int64_t = tensor;
+  if (param->input_scale != param->output_scale) {
    // 3) Multiply the integer multiplier
    if (left_shift != 0) {
      tensor = Multiply(tensor, MakeConstantScalar(hp_dtype, 1 << left_shift));
@ -166,11 +169,15 @@ Expr RequantizeLower(const Expr& input_tensor, const RequantizeAttrs* param,
    tensor = Add(tensor, round_scalar);

    // 5) Simply right shift the result to get the final output.
-  auto scaled_int64_t = RightShift(tensor, MakeConstantScalar(hp_dtype, total_right_shift));
+    scaled_int64_t = RightShift(tensor, MakeConstantScalar(hp_dtype, total_right_shift));
+  }

  // 6) Add the output zero point.
+  auto shifted_int64_t = scaled_int64_t;
+  if (param->output_zero_point != 0) {
    auto output_zp = MakeConstantScalar(hp_dtype, param->output_zero_point);
-  auto shifted_int64_t = Add(output_zp, scaled_int64_t);
+    shifted_int64_t = Add(output_zp, scaled_int64_t);
+  }

  // 7) Clip to the out_dtype min/max.
  auto q_min = GetQmin(out_dtype);
--- a/tests/python/relay/test_qnn_requantize.py
+++ b/tests/python/relay/test_qnn_requantize.py
@ -64,6 +64,7 @@ def test_requantize():
                          input_scale=0.5,
                          output_scale=0.5,
                          rounding=rounding)
+            assert 'right_shift' not in mod.astext()
            verify(mod, (golden_data, golden_output))

    def downscale_test():