Alter Speed 3.

This patch deletes the variance based speed three partitioning. Speed 3 now uses the same partitioning method as speed 2 but with some stricter conditions. The speed and quality are now somewhere between speeds 2 and 4 whereas before it was worse in both than speed 4. Change-Id: Ia142e7007299d79db3ceee6ca8670540db6f7a41
2013-09-19 18:20:18 +01:00 · 2013-09-19 18:20:18 +01:00 · a76caa7ff4
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@ -951,323 +951,6 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
  }
 }

-static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8,
-                           BLOCK_SIZE bsize, int mis, int mi_row,
-                           int mi_col) {
-  int r, c;
-  const int bs = MAX(num_8x8_blocks_wide_lookup[bsize],
-                     num_8x8_blocks_high_lookup[bsize]);
-  const int idx_str = mis * mi_row + mi_col;
-  MODE_INFO **const mi2 = &mi_8x8[idx_str];
-
-  mi2[0] = cm->mi + idx_str;
-  mi2[0]->mbmi.sb_type = bsize;
-
-  for (r = 0; r < bs; r++)
-    for (c = 0; c < bs; c++)
-      if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols)
-        mi2[r * mis + c] = mi2[0];
-}
-
-typedef struct {
-  int64_t sum_square_error;
-  int64_t sum_error;
-  int count;
-  int variance;
-} var;
-
-typedef struct {
-  var none;
-  var horz[2];
-  var vert[2];
-} partition_variance;
-
-#define VT(TYPE, BLOCKSIZE) \
-  typedef struct { \
-    partition_variance vt; \
-    BLOCKSIZE split[4]; } TYPE;
-
-VT(v8x8, var)
-VT(v16x16, v8x8)
-VT(v32x32, v16x16)
-VT(v64x64, v32x32)
-
-typedef struct {
-  partition_variance *vt;
-  var *split[4];
-} vt_node;
-
-typedef enum {
-  V16X16,
-  V32X32,
-  V64X64,
-} TREE_LEVEL;
-
-static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) {
-  int i;
-  switch (bsize) {
-    case BLOCK_64X64: {
-      v64x64 *vt = (v64x64 *) data;
-      node->vt = &vt->vt;
-      for (i = 0; i < 4; i++)
-        node->split[i] = &vt->split[i].vt.none;
-      break;
-    }
-    case BLOCK_32X32: {
-      v32x32 *vt = (v32x32 *) data;
-      node->vt = &vt->vt;
-      for (i = 0; i < 4; i++)
-        node->split[i] = &vt->split[i].vt.none;
-      break;
-    }
-    case BLOCK_16X16: {
-      v16x16 *vt = (v16x16 *) data;
-      node->vt = &vt->vt;
-      for (i = 0; i < 4; i++)
-        node->split[i] = &vt->split[i].vt.none;
-      break;
-    }
-    case BLOCK_8X8: {
-      v8x8 *vt = (v8x8 *) data;
-      node->vt = &vt->vt;
-      for (i = 0; i < 4; i++)
-        node->split[i] = &vt->split[i];
-      break;
-    }
-    default:
-      node->vt = 0;
-      for (i = 0; i < 4; i++)
-        node->split[i] = 0;
-      assert(-1);
-  }
-}
-
-// Set variance values given sum square error, sum error, count.
-static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
-  v->sum_square_error = s2;
-  v->sum_error = s;
-  v->count = c;
-  if (c > 0)
-    v->variance = (int)(256
-        * (v->sum_square_error - v->sum_error * v->sum_error / v->count)
-        / v->count);
-  else
-    v->variance = 0;
-}
-
-// Combine 2 variance structures by summing the sum_error, sum_square_error,
-// and counts and then calculating the new variance.
-void sum_2_variances(var *r, var *a, var*b) {
-  fill_variance(r, a->sum_square_error + b->sum_square_error,
-                a->sum_error + b->sum_error, a->count + b->count);
-}
-
-static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
-  vt_node node;
-  tree_to_node(data, bsize, &node);
-  sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
-  sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
-  sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
-  sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
-  sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
-}
-
-#if PERFORM_RANDOM_PARTITIONING
-static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
-    BLOCK_SIZE block_size, int mi_row,
-    int mi_col, int mi_size) {
-  VP9_COMMON * const cm = &cpi->common;
-  vt_node vt;
-  const int mis = cm->mode_info_stride;
-  int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
-
-  tree_to_node(data, block_size, &vt);
-
-  // split none is available only if we have more than half a block size
-  // in width and height inside the visible image
-  if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
-      (rand() & 3) < 1) {
-    set_block_size(cm, m, block_size, mis, mi_row, mi_col);
-    return 1;
-  }
-
-  // vertical split is available on all but the bottom border
-  if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
-      && (rand() & 3) < 1) {
-    set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
-        mi_col);
-    return 1;
-  }
-
-  // horizontal split is available on all but the right border
-  if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
-      && (rand() & 3) < 1) {
-    set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
-        mi_col);
-    return 1;
-  }
-
-  return 0;
-}
-
-#else  // !PERFORM_RANDOM_PARTITIONING
-
-static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m,
-                               BLOCK_SIZE bsize, int mi_row,
-                               int mi_col, int mi_size) {
-  VP9_COMMON * const cm = &cpi->common;
-  vt_node vt;
-  const int mis = cm->mode_info_stride;
-  int64_t threshold = 50 * cpi->common.base_qindex;
-
-  tree_to_node(data, bsize, &vt);
-
-  // split none is available only if we have more than half a block size
-  // in width and height inside the visible image
-  if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
-      && vt.vt->none.variance < threshold) {
-    set_block_size(cm, m, bsize, mis, mi_row, mi_col);
-    return 1;
-  }
-
-  // vertical split is available on all but the bottom border
-  if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
-      && vt.vt->vert[1].variance < threshold) {
-    set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row,
-                   mi_col);
-    return 1;
-  }
-
-  // horizontal split is available on all but the right border
-  if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
-      && vt.vt->horz[1].variance < threshold) {
-    set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row,
-                   mi_col);
-    return 1;
-  }
-
-  return 0;
-}
-#endif  // PERFORM_RANDOM_PARTITIONING
-
-static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
-                                int mi_row, int mi_col) {
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
-  MACROBLOCKD *xd = &cpi->mb.e_mbd;
-  const int mis = cm->mode_info_stride;
-  // TODO(JBB): More experimentation or testing of this threshold;
-  int64_t threshold = 4;
-  int i, j, k;
-  v64x64 vt;
-  unsigned char * s;
-  int sp;
-  const unsigned char * d;
-  int dp;
-  int pixels_wide = 64, pixels_high = 64;
-
-  vp9_zero(vt);
-  set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
-
-  if (xd->mb_to_right_edge < 0)
-    pixels_wide += (xd->mb_to_right_edge >> 3);
-
-  if (xd->mb_to_bottom_edge < 0)
-    pixels_high += (xd->mb_to_bottom_edge >> 3);
-
-  s = x->plane[0].src.buf;
-  sp = x->plane[0].src.stride;
-
-  // TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
-  // but this needs more experimentation.
-  threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
-
-  d = vp9_64x64_zeros;
-  dp = 64;
-  if (cm->frame_type != KEY_FRAME) {
-    int_mv nearest_mv, near_mv;
-    const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
-    YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
-    YV12_BUFFER_CONFIG *second_ref_fb = NULL;
-
-    setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
-                     &xd->scale_factor[0]);
-    setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
-                     &xd->scale_factor[1]);
-
-    xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME;
-    xd->this_mi->mbmi.sb_type = BLOCK_64X64;
-    vp9_find_best_ref_mvs(xd,
-                          mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]],
-                          &nearest_mv, &near_mv);
-
-    xd->this_mi->mbmi.mv[0] = nearest_mv;
-    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
-
-    d = xd->plane[0].dst.buf;
-    dp = xd->plane[0].dst.stride;
-  }
-
-  // Fill in the entire tree of 8x8 variances for splits.
-  for (i = 0; i < 4; i++) {
-    const int x32_idx = ((i & 1) << 5);
-    const int y32_idx = ((i >> 1) << 5);
-    for (j = 0; j < 4; j++) {
-      const int x16_idx = x32_idx + ((j & 1) << 4);
-      const int y16_idx = y32_idx + ((j >> 1) << 4);
-      v16x16 *vst = &vt.split[i].split[j];
-      for (k = 0; k < 4; k++) {
-        int x_idx = x16_idx + ((k & 1) << 3);
-        int y_idx = y16_idx + ((k >> 1) << 3);
-        unsigned int sse = 0;
-        int sum = 0;
-        if (x_idx < pixels_wide && y_idx < pixels_high)
-          vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
-                              d + y_idx * dp + x_idx, dp, &sse, &sum);
-        fill_variance(&vst->split[k].vt.none, sse, sum, 64);
-      }
-    }
-  }
-  // Fill the rest of the variance tree by summing the split partition
-  // values.
-  for (i = 0; i < 4; i++) {
-    for (j = 0; j < 4; j++) {
-      fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
-    }
-    fill_variance_tree(&vt.split[i], BLOCK_32X32);
-  }
-  fill_variance_tree(&vt, BLOCK_64X64);
-  // Now go through the entire structure,  splitting every block size until
-  // we get to one that's got a variance lower than our threshold,  or we
-  // hit 8x8.
-  if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col,
-                           4)) {
-    for (i = 0; i < 4; ++i) {
-      const int x32_idx = ((i & 1) << 2);
-      const int y32_idx = ((i >> 1) << 2);
-      if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32,
-                               (mi_row + y32_idx), (mi_col + x32_idx), 2)) {
-        for (j = 0; j < 4; ++j) {
-          const int x16_idx = ((j & 1) << 1);
-          const int y16_idx = ((j >> 1) << 1);
-          if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8,
-                                   BLOCK_16X16,
-                                   (mi_row + y32_idx + y16_idx),
-                                   (mi_col + x32_idx + x16_idx), 1)) {
-            for (k = 0; k < 4; ++k) {
-              const int x8_idx = (k & 1);
-              const int y8_idx = (k >> 1);
-              set_block_size(cm, mi_8x8, BLOCK_8X8, mis,
-                             (mi_row + y32_idx + y16_idx + y8_idx),
-                             (mi_col + x32_idx + x16_idx + x8_idx));
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
 static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8,
                             TOKENEXTRA **tp, int mi_row, int mi_col,
                             BLOCK_SIZE bsize, int *rate, int64_t *dist,
@ -2060,7 +1743,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
    if (cpi->sf.reference_masking)
      rd_pick_reference_frame(cpi, mi_row, mi_col);

-    if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
+    if (cpi->sf.use_lastframe_partitioning ||
        cpi->sf.use_one_partition_size_always ) {
      const int idx_str = cm->mode_info_stride * mi_row + mi_col;
      MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
@ -2072,10 +1755,6 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
        set_partitioning(cpi, mi_8x8, mi_row, mi_col);
        rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                         &dummy_rate, &dummy_dist, 1);
-      } else if (cpi->sf.partition_by_variance) {
-        choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col);
-        rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
-                         &dummy_rate, &dummy_dist, 1);
      } else {
        if ((cpi->common.current_video_frame
            % cpi->sf.last_partitioning_redo_frequency) == 0
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@ -703,7 +703,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
  sf->adaptive_motion_search = 0;
  sf->use_avoid_tested_higherror = 0;
  sf->reference_masking = 0;
-  sf->partition_by_variance = 0;
  sf->use_one_partition_size_always = 0;
  sf->less_rectangular_check = 0;
  sf->use_square_partition_only = 0;
@ -826,8 +825,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
        sf->mode_skip_start = 6;
      }
      if (speed == 3) {
+        sf->less_rectangular_check  = 1;
+        sf->use_square_partition_only = 1;
        sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
-        sf->partition_by_variance = 1;
+        sf->use_lastframe_partitioning = 1;
+        sf->adjust_partitioning_from_last_frame = 1;
+        sf->last_partitioning_redo_frequency = 3;
        sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
                                      cpi->common.intra_only ||
                                      cpi->common.show_frame == 0) ?
@ -839,17 +842,23 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
                                     FLAG_SKIP_COMP_REFMISMATCH |
                                     FLAG_SKIP_INTRA_LOWVAR |
                                     FLAG_EARLY_TERMINATE;
+        sf->intra_y_mode_mask = INTRA_DC_ONLY;
+        sf->intra_uv_mode_mask = INTRA_DC_ONLY;
+        sf->use_uv_intra_rd_estimate = 1;
        sf->use_rd_breakout = 1;
        sf->skip_encode_sb = 1;
        sf->use_lp32x32fdct = 1;
+        sf->adaptive_motion_search = 1;
+        sf->using_small_partition_info = 0;
        sf->disable_splitmv = 1;
        sf->auto_mv_step_size = 1;
        sf->search_method = BIGDIA;
        sf->subpel_iters_per_step = 1;
+        sf->use_fast_lpf_pick = 1;
+        sf->auto_min_max_partition_size = 1;
+        sf->auto_min_max_partition_interval = 2;
        sf->disable_split_var_thresh = 64;
        sf->disable_filter_search_var_thresh = 64;
-        sf->intra_y_mode_mask = INTRA_DC_ONLY;
-        sf->intra_uv_mode_mask = INTRA_DC_ONLY;
        sf->use_fast_coef_updates = 2;
        sf->mode_skip_start = 6;
      }
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@ -250,7 +250,6 @@ typedef struct {
  TX_SIZE_SEARCH_METHOD tx_size_search_method;
  int use_lp32x32fdct;
  int use_avoid_tested_higherror;
-  int partition_by_variance;
  int use_one_partition_size_always;
  int less_rectangular_check;
  int use_square_partition_only;