Improves subpixel reference mv evaluation

Previously, in evaluating reference motion vectors, MVs are always rounded to integer pixel position and SADs are calculated. This commit takes into account the subpixel portion of the mvs, and uses bilinear interpolation to produce reference pixel values in subpixel postions. In addition, SSE is used in place of SAD. Pixels used are 16x2 above and 2x16 to the left. This commmit intends to test the potential of this line of work in term of compression improvement, obviously, the change would increase decoder complexity significantly. Test results std-hd: 1.738%(avg) 1.779%(glb), 1.663%(ssim) derf: 0.472%(avg) 0.477%(glb), 0.418%(ssim) Change-Id: I3ae1b098f6289df78891134d9a5e4bb2fde87a0b
2012-10-26 09:14:15 -07:00 · 2012-10-26 09:14:15 -07:00 · cc9c637d07
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@ -168,6 +168,7 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc,
 }

 #if CONFIG_NEWBESTREFMV
+#define SP(x) (((x) & 7) << 1)
 unsigned int vp8_sad3x16_c(
  const unsigned char *src_ptr,
  int  src_stride,
@ -189,7 +190,6 @@ unsigned int vp8_sad16x3_c(
 * above and a number cols of pixels in the left to select the one with best
 * score to use as ref motion vector
 */
-
 void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
                           unsigned char *ref_y_buffer,
                           int ref_y_stride,
@ -203,6 +203,7 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
  unsigned char *above_ref;
  unsigned char *left_ref;
  int sad;
+  int sse;
  int sad_scores[MAX_MV_REFS] = {0};
  int_mv sorted_mvs[MAX_MV_REFS];
  int zero_seen = FALSE;
@ -211,16 +212,16 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
  best_mv->as_int = nearest->as_int = near->as_int = 0;
  vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs));

-  above_src = xd->dst.y_buffer - xd->dst.y_stride * 3;
-  left_src  = xd->dst.y_buffer - 3;
-  above_ref = ref_y_buffer - ref_y_stride * 3;
-  left_ref  = ref_y_buffer - 3;
+  above_src = xd->dst.y_buffer - xd->dst.y_stride * 2;
+  left_src  = xd->dst.y_buffer - 2;
+  above_ref = ref_y_buffer - ref_y_stride * 2;
+  left_ref  = ref_y_buffer - 2;

  //for(i = 0; i < MAX_MV_REFS; ++i) {
  // Limit search to the predicted best 4
  for(i = 0; i < 4; ++i) {
    int_mv this_mv;
-    int offset=0;
+    int offset = 0;
    int row_offset, col_offset;

    this_mv.as_int = mvlist[i].as_int;
@ -238,19 +239,23 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
                 xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16,
                 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);

-    row_offset = (this_mv.as_mv.row > 0) ?
-      ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3);
-    col_offset = (this_mv.as_mv.col > 0) ?
-      ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3);
+    row_offset = this_mv.as_mv.row >> 3;
+    col_offset = this_mv.as_mv.col >> 3;
    offset = ref_y_stride * row_offset + col_offset;

    sad = 0;
-    if (xd->up_available)
-      sad += vp8_sad16x3(above_src, xd->dst.y_stride,
-                           above_ref + offset, ref_y_stride, INT_MAX);
-    if (xd->left_available)
-      sad += vp8_sad3x16(left_src, xd->dst.y_stride,
-                           left_ref + offset, ref_y_stride, INT_MAX);
+    if (xd->up_available) {
+      vp8_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride,
+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                                   above_src, xd->dst.y_stride, &sse);
+      sad += sse;
+    }
+    if (xd->left_available) {
+      vp8_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride,
+                                   SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
+                                   left_src, xd->dst.y_stride, &sse);
+      sad += sse;
+    }
    // Add the entry to our list and then resort the list on score.
    sad_scores[i] = sad;
    sorted_mvs[i].as_int = this_mv.as_int;
@ -280,7 +285,7 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd,
  // be more than one 0,0 entry in the sorted list.
  // The best ref mv is always set to the first entry (which gave the best
  // results. The nearest is set to the first non zero vector if available and
-  // near to the second non zero vector if avaialable.
+  // near to the second non zero vector if available.
  // We do not use 0,0 as a nearest or near as 0,0 has its own mode.
  if ( sorted_mvs[0].as_int ) {
    nearest->as_int = sorted_mvs[0].as_int;
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@ -508,3 +508,80 @@ unsigned int vp8_sub_pixel_variance8x16_c

  return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
+#if CONFIG_NEWBESTREFMV
+unsigned int vp8_variance2x16_c(
+  const unsigned char *src_ptr,
+  const int  source_stride,
+  const unsigned char *ref_ptr,
+  const int  recon_stride,
+  unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);
+  *sse = var;
+  return (var - ((avg * avg) >> 5));
+}
+
+unsigned int vp8_variance16x2_c(
+  const unsigned char *src_ptr,
+  const int  source_stride,
+  const unsigned char *ref_ptr,
+  const int  recon_stride,
+  unsigned int *sse) {
+  unsigned int var;
+  int avg;
+
+  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);
+  *sse = var;
+  return (var - ((avg * avg) >> 5));
+}
+
+unsigned int vp8_sub_pixel_variance16x2_c
+(
+  const unsigned char  *src_ptr,
+  const int  src_pixels_per_line,
+  const int  xoffset,
+  const int  yoffset,
+  const unsigned char *dst_ptr,
+  const int dst_pixels_per_line,
+  unsigned int *sse
+) {
+  unsigned short FData3[16 * 3];  // Temp data bufffer used in filtering
+  unsigned char  temp2[20 * 16];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp8_bilinear_filters[xoffset];
+  VFilter = vp8_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 3, 16, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);
+
+  return vp8_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp8_sub_pixel_variance2x16_c
+(
+  const unsigned char  *src_ptr,
+  const int  src_pixels_per_line,
+  const int  xoffset,
+  const int  yoffset,
+  const unsigned char *dst_ptr,
+  const int dst_pixels_per_line,
+  unsigned int *sse
+) {
+  unsigned short FData3[2 * 17];  // Temp data bufffer used in filtering
+  unsigned char  temp2[2 * 16];
+  const short *HFilter, *VFilter;
+
+  HFilter = vp8_bilinear_filters[xoffset];
+  VFilter = vp8_bilinear_filters[yoffset];
+
+  var_filter_block2d_bil_first_pass(src_ptr, FData3,
+                                    src_pixels_per_line, 1, 17, 2, HFilter);
+  var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);
+
+  return vp8_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif