Compute compound average in warp_plane only for COMPOUND_AVERAGE

This fixes a mismatch which occurs when global/warped motion and a masked compound type are used together. Change-Id: I08b2702cdb3b85f8d8817b9286a73951c97cf379
2017-05-04 15:33:10 -07:00 · 2017-05-04 15:33:10 -07:00 · 7afb8b758a
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@ -608,11 +608,11 @@ if (aom_config("CONFIG_PVQ") eq "yes") {

 if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") ||
    (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) {
-  add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+  add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
  specialize qw/av1_warp_affine sse2 ssse3/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+    add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
    specialize qw/av1_highbd_warp_affine ssse3/;
  }
 }
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@ -415,13 +415,19 @@ static INLINE void av1_make_inter_predictor(
  if (do_warp) {
    const struct macroblockd_plane *const pd = &xd->plane[plane];
    const struct buf_2d *const pre_buf = &pd->pre[ref];
+#if CONFIG_EXT_INTER
+    int compute_avg =
+        ref && mi->mbmi.interinter_compound_type == COMPOUND_AVERAGE;
+#else
+    int compute_avg = ref;
+#endif  // CONFIG_EXT_INTER
    av1_warp_plane(&final_warp_params,
 #if CONFIG_HIGHBITDEPTH
                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
 #endif  // CONFIG_HIGHBITDEPTH
                   pre_buf->buf0, pre_buf->width, pre_buf->height,
                   pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
-                   pd->subsampling_x, pd->subsampling_y, xs, ys, ref);
+                   pd->subsampling_x, pd->subsampling_y, xs, ys, compute_avg);
    return;
  }
 #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@ -909,7 +909,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
                                  int p_width, int p_height, int p_stride,
                                  int subsampling_x, int subsampling_y,
                                  int x_scale, int y_scale, int bd,
-                                  int ref_frm) {
+                                  int comp_avg) {
  int i, j;
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
@ -923,7 +923,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8,
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
-      if (ref_frm)
+      if (comp_avg)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                highbd_warp_interpolate(ref, out[0], out[1], width, height,
@ -953,7 +953,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
                              int width, int height, int stride, uint16_t *pred,
                              int p_col, int p_row, int p_width, int p_height,
                              int p_stride, int subsampling_x,
-                              int subsampling_y, int bd, int ref_frm,
+                              int subsampling_y, int bd, int comp_avg,
                              int16_t alpha, int16_t beta, int16_t gamma,
                              int16_t delta) {
 #if HORSHEAR_REDUCE_PREC_BITS >= 5
@ -1059,7 +1059,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
          }
          sum = clip_pixel_highbd(
              ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd);
-          if (ref_frm)
+          if (comp_avg)
            *p = ROUND_POWER_OF_TWO(*p + sum, 1);
          else
            *p = sum;
@ -1075,7 +1075,7 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
                              int p_row, int p_width, int p_height,
                              int p_stride, int subsampling_x,
                              int subsampling_y, int x_scale, int y_scale,
-                              int bd, int ref_frm) {
+                              int bd, int comp_avg) {
  if (wm->wmtype == ROTZOOM) {
    wm->wmmat[5] = wm->wmmat[2];
    wm->wmmat[4] = -wm->wmmat[3];
@ -1092,12 +1092,12 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
    uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
    av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                           p_width, p_height, p_stride, subsampling_x,
-                           subsampling_y, bd, ref_frm, alpha, beta, gamma,
+                           subsampling_y, bd, comp_avg, alpha, beta, gamma,
                           delta);
  } else {
    highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row,
                          p_width, p_height, p_stride, subsampling_x,
-                          subsampling_y, x_scale, y_scale, bd, ref_frm);
+                          subsampling_y, x_scale, y_scale, bd, comp_avg);
  }
 }

@ -1138,7 +1138,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
                           int height, int stride, uint8_t *pred, int p_col,
                           int p_row, int p_width, int p_height, int p_stride,
                           int subsampling_x, int subsampling_y, int x_scale,
-                           int y_scale, int ref_frm) {
+                           int y_scale, int comp_avg) {
  int i, j;
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
  if (projectpoints == NULL) return;
@ -1150,7 +1150,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
-      if (ref_frm)
+      if (comp_avg)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                warp_interpolate(ref, out[0], out[1], width, height, stride),
@ -1202,7 +1202,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width,
 void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
                       int height, int stride, uint8_t *pred, int p_col,
                       int p_row, int p_width, int p_height, int p_stride,
-                       int subsampling_x, int subsampling_y, int ref_frm,
+                       int subsampling_x, int subsampling_y, int comp_avg,
                       int16_t alpha, int16_t beta, int16_t gamma,
                       int16_t delta) {
  int16_t tmp[15 * 8];
@ -1316,7 +1316,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
            sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
          }
          sum = clip_pixel(ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS));
-          if (ref_frm)
+          if (comp_avg)
            *p = ROUND_POWER_OF_TWO(*p + sum, 1);
          else
            *p = sum;
@ -1331,7 +1331,7 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
                       int height, int stride, uint8_t *pred, int p_col,
                       int p_row, int p_width, int p_height, int p_stride,
                       int subsampling_x, int subsampling_y, int x_scale,
-                       int y_scale, int ref_frm) {
+                       int y_scale, int comp_avg) {
  if (wm->wmtype == ROTZOOM) {
    wm->wmmat[5] = wm->wmmat[2];
    wm->wmmat[4] = -wm->wmmat[3];
@ -1346,11 +1346,11 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,

    av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                    p_width, p_height, p_stride, subsampling_x, subsampling_y,
-                    ref_frm, alpha, beta, gamma, delta);
+                    comp_avg, alpha, beta, gamma, delta);
  } else {
    warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
                   p_height, p_stride, subsampling_x, subsampling_y, x_scale,
-                   y_scale, ref_frm);
+                   y_scale, comp_avg);
  }
 }

@ -1409,17 +1409,17 @@ void av1_warp_plane(WarpedMotionParams *wm,
                    uint8_t *ref, int width, int height, int stride,
                    uint8_t *pred, int p_col, int p_row, int p_width,
                    int p_height, int p_stride, int subsampling_x,
-                    int subsampling_y, int x_scale, int y_scale, int ref_frm) {
+                    int subsampling_y, int x_scale, int y_scale, int comp_avg) {
 #if CONFIG_HIGHBITDEPTH
  if (use_hbd)
    highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
-                      x_scale, y_scale, bd, ref_frm);
+                      x_scale, y_scale, bd, comp_avg);
  else
 #endif  // CONFIG_HIGHBITDEPTH
    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
               p_height, p_stride, subsampling_x, subsampling_y, x_scale,
-               y_scale, ref_frm);
+               y_scale, comp_avg);
 }

 #if CONFIG_WARPED_MOTION
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@ -87,7 +87,7 @@ void av1_warp_plane(WarpedMotionParams *wm,
                    uint8_t *ref, int width, int height, int stride,
                    uint8_t *pred, int p_col, int p_row, int p_width,
                    int p_height, int p_stride, int subsampling_x,
-                    int subsampling_y, int x_scale, int y_scale, int ref_frm);
+                    int subsampling_y, int x_scale, int y_scale, int comp_avg);

 int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy,
                    int mvx, WarpedMotionParams *wm_params, int mi_row,
--- a/av1/common/x86/highbd_warp_plane_ssse3.c
+++ b/av1/common/x86/highbd_warp_plane_ssse3.c
@ -20,7 +20,7 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
                                  uint16_t *pred, int p_col, int p_row,
                                  int p_width, int p_height, int p_stride,
                                  int subsampling_x, int subsampling_y, int bd,
-                                  int ref_frm, int16_t alpha, int16_t beta,
+                                  int comp_avg, int16_t alpha, int16_t beta,
                                  int16_t gamma, int16_t delta) {
 #if HORSHEAR_REDUCE_PREC_BITS >= 5
  __m128i tmp[15];
@ -304,10 +304,12 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
        // to only output 4 pixels at this point, to avoid encode/decode
        // mismatches when encoding with multiple threads.
        if (p_width == 4) {
-          if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
+          if (comp_avg)
+            res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p));
          _mm_storel_epi64(p, res_16bit);
        } else {
-          if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
+          if (comp_avg)
+            res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
          _mm_storeu_si128(p, res_16bit);
        }
      }
--- a/av1/common/x86/warp_plane_sse2.c
+++ b/av1/common/x86/warp_plane_sse2.c
@ -18,7 +18,7 @@
 void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
                          int height, int stride, uint8_t *pred, int p_col,
                          int p_row, int p_width, int p_height, int p_stride,
-                          int subsampling_x, int subsampling_y, int ref_frm,
+                          int subsampling_x, int subsampling_y, int comp_avg,
                          int16_t alpha, int16_t beta, int16_t gamma,
                          int16_t delta) {
  __m128i tmp[15];
@ -296,13 +296,13 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
        // to only output 4 pixels at this point, to avoid encode/decode
        // mismatches when encoding with multiple threads.
        if (p_width == 4) {
-          if (ref_frm) {
+          if (comp_avg) {
            const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
            res_8bit = _mm_avg_epu8(res_8bit, orig);
          }
          *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
        } else {
-          if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
+          if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
          _mm_storel_epi64(p, res_8bit);
        }
      }
--- a/av1/common/x86/warp_plane_ssse3.c
+++ b/av1/common/x86/warp_plane_ssse3.c
@ -205,7 +205,7 @@ static const uint8_t odd_mask[16] = { 1, 3,  3,  5,  5,  7,  7,  9,
 void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
                           int height, int stride, uint8_t *pred, int p_col,
                           int p_row, int p_width, int p_height, int p_stride,
-                           int subsampling_x, int subsampling_y, int ref_frm,
+                           int subsampling_x, int subsampling_y, int comp_avg,
                           int16_t alpha, int16_t beta, int16_t gamma,
                           int16_t delta) {
  __m128i tmp[15];
@ -473,13 +473,13 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
        // to only output 4 pixels at this point, to avoid encode/decode
        // mismatches when encoding with multiple threads.
        if (p_width == 4) {
-          if (ref_frm) {
+          if (comp_avg) {
            const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p);
            res_8bit = _mm_avg_epu8(res_8bit, orig);
          }
          *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
        } else {
-          if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
+          if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
          _mm_storel_epi64(p, res_8bit);
        }
      }