Integrate global_motion with convolve_round in hbd

Change-Id: I46d77055765c0edcb615f1bb851fea10f6fc84ba
2017-06-20 10:44:03 -07:00 · 2017-06-20 10:44:03 -07:00 · a1d0c4d53c
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@ -615,6 +615,9 @@ if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
    specialize qw/av1_highbd_warp_affine ssse3/;
+    if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
+      add_proto qw/void av1_highbd_warp_affine_post_round/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+    }
  }
 }

--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@ -1030,6 +1030,112 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
  }
 }

+#if CONFIG_CONVOLVE_ROUND
+void av1_highbd_warp_affine_post_round_c(
+    const int32_t *mat, const uint16_t *ref, int width, int height, int stride,
+    uint16_t *pred, int p_col, int p_row, int p_width, int p_height,
+    int p_stride, int subsampling_x, int subsampling_y, int bd,
+    ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma,
+    int16_t delta) {
+  (void)pred;
+  (void)p_stride;
+  int32_t tmp[15 * 8];
+  int i, j, k, l, m;
+  const int offset_bits_horiz = bd + FILTER_BITS - 1;
+  const int offset_bits_vert = bd + 2 * FILTER_BITS - conv_params->round_0;
+  assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
+
+  for (i = p_row; i < p_row + p_height; i += 8) {
+    for (j = p_col; j < p_col + p_width; j += 8) {
+      int32_t x4, y4, ix4, sx4, iy4, sy4;
+      if (subsampling_x)
+        x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
+              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
+             4;
+      else
+        x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
+
+      if (subsampling_y)
+        y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
+              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
+             4;
+      else
+        y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
+
+      ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+      sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+      iy4 = y4 >> WARPEDMODEL_PREC_BITS;
+      sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+
+      sx4 += alpha * (-4) + beta * (-4);
+      sy4 += gamma * (-4) + delta * (-4);
+
+      sx4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
+      sy4 &= ~((1 << WARP_PARAM_REDUCE_BITS) - 1);
+
+      // Horizontal filter
+      for (k = -7; k < 8; ++k) {
+        int iy = iy4 + k;
+        if (iy < 0)
+          iy = 0;
+        else if (iy > height - 1)
+          iy = height - 1;
+
+        int sx = sx4 + beta * (k + 4);
+        for (l = -4; l < 4; ++l) {
+          int ix = ix4 + l - 3;
+          const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
+                           WARPEDPIXEL_PREC_SHIFTS;
+          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
+          const int16_t *coeffs = warped_filter[offs];
+
+          int32_t sum = 1 << offset_bits_horiz;
+          for (m = 0; m < 8; ++m) {
+            int sample_x = ix + m;
+            if (sample_x < 0)
+              sample_x = 0;
+            else if (sample_x > width - 1)
+              sample_x = width - 1;
+            sum += ref[iy * stride + sample_x] * coeffs[m];
+          }
+          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+          assert(0 <= sum &&
+                 sum < (1 << (bd + FILTER_BITS + 1 - conv_params->round_0)));
+          tmp[(k + 7) * 8 + (l + 4)] = sum;
+          sx += alpha;
+        }
+      }
+
+      // Vertical filter
+      for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
+        int sy = sy4 + delta * (k + 4);
+        for (l = -4; l < 4; ++l) {
+          const int offs = ROUND_POWER_OF_TWO(sy, WARPEDDIFF_PREC_BITS) +
+                           WARPEDPIXEL_PREC_SHIFTS;
+          assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
+          const int16_t *coeffs = warped_filter[offs];
+
+          int32_t sum = 1 << offset_bits_vert;
+          for (m = 0; m < 8; ++m) {
+            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
+          }
+
+          sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) -
+                (1 << (offset_bits_horiz + FILTER_BITS - conv_params->round_0 -
+                       conv_params->round_1)) -
+                (1 << (offset_bits_vert - conv_params->round_1));
+          CONV_BUF_TYPE *p =
+              &conv_params->dst[(i - p_row + k + 4) * conv_params->dst_stride +
+                                (j - p_col + l + 4)];
+          *p += sum;
+          sy += gamma;
+        }
+      }
+    }
+  }
+}
+#endif
+
 static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,
                              int width, int height, int stride,
                              const uint8_t *const pred8, int p_col, int p_row,
@ -1051,10 +1157,25 @@ static void highbd_warp_plane(WarpedMotionParams *wm, const uint8_t *const ref8,

    const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
    uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+#if CONFIG_CONVOLVE_ROUND
+    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
+      conv_params->do_post_rounding = 1;
+      av1_highbd_warp_affine_post_round(
+          mat, ref, width, height, stride, pred, p_col, p_row, p_width,
+          p_height, p_stride, subsampling_x, subsampling_y, bd, conv_params,
+          alpha, beta, gamma, delta);
+    } else {
+      av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col,
+                             p_row, p_width, p_height, p_stride, subsampling_x,
+                             subsampling_y, bd, conv_params, alpha, beta, gamma,
+                             delta);
+    }
+#else
    av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                           p_width, p_height, p_stride, subsampling_x,
                           subsampling_y, bd, conv_params, alpha, beta, gamma,
                           delta);
+#endif
  } else {
    highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
                          p_width, p_height, p_stride, subsampling_x,