Merge "Account context based prob model for motion vector cost estimate" into nextgenv2

2016-02-22 17:37:41 +00:00 · 2016-02-22 17:37:41 +00:00 · a10814e11e
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@ -107,9 +107,18 @@ struct macroblock {
  unsigned int pred_sse[MAX_REF_FRAMES];
  int pred_mv_sad[MAX_REF_FRAMES];

+#if CONFIG_REF_MV
+  int *nmvjointcost;
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int *nmvcost[NMV_CONTEXTS][2];
+  int *nmvcost_hp[NMV_CONTEXTS][2];
+  int **mv_cost_stack[NMV_CONTEXTS];
+#else
  int nmvjointcost[MV_JOINTS];
  int *nmvcost[2];
  int *nmvcost_hp[2];
+#endif
+
  int **mvcost;

  int nmvjointsadcost[MV_JOINTS];
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@ -222,6 +222,22 @@ int vp10_get_active_map(VP10_COMP* cpi,
 void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv) {
  MACROBLOCK *const mb = &cpi->td.mb;
  cpi->common.allow_high_precision_mv = allow_high_precision_mv;
+
+#if CONFIG_REF_MV
+  if (cpi->common.allow_high_precision_mv) {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
+      mb->mvsadcost = mb->nmvsadcost_hp;
+    }
+  } else {
+    int i;
+    for (i = 0; i < NMV_CONTEXTS; ++i) {
+      mb->mv_cost_stack[i] = mb->nmvcost[i];
+      mb->mvsadcost = mb->nmvsadcost;
+    }
+  }
+#else
  if (cpi->common.allow_high_precision_mv) {
    mb->mvcost = mb->nmvcost_hp;
    mb->mvsadcost = mb->nmvsadcost_hp;
@ -229,6 +245,7 @@ void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv) {
    mb->mvcost = mb->nmvcost;
    mb->mvsadcost = mb->nmvsadcost;
  }
+#endif
 }

 static void setup_frame(VP10_COMP *cpi) {
@ -338,6 +355,9 @@ void vp10_initialize_enc(void) {

 static void dealloc_compressor_data(VP10_COMP *cpi) {
  VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif

  vpx_free(cpi->mbmi_ext_base);
  cpi->mbmi_ext_base = NULL;
@ -351,6 +371,19 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
  vpx_free(cpi->coding_context.last_frame_seg_map_copy);
  cpi->coding_context.last_frame_seg_map_copy = NULL;

+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vpx_free(cpi->nmv_costs[i][0]);
+    vpx_free(cpi->nmv_costs[i][1]);
+    vpx_free(cpi->nmv_costs_hp[i][0]);
+    vpx_free(cpi->nmv_costs_hp[i][1]);
+    cpi->nmv_costs[i][0] = NULL;
+    cpi->nmv_costs[i][1] = NULL;
+    cpi->nmv_costs_hp[i][0] = NULL;
+    cpi->nmv_costs_hp[i][1] = NULL;
+  }
+#endif
+
  vpx_free(cpi->nmvcosts[0]);
  vpx_free(cpi->nmvcosts[1]);
  cpi->nmvcosts[0] = NULL;
@ -412,12 +445,29 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
 static void save_coding_context(VP10_COMP *cpi) {
  CODING_CONTEXT *const cc = &cpi->coding_context;
  VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif

  // Stores a snapshot of key state variables which can subsequently be
  // restored with a call to vp10_restore_coding_context. These functions are
  // intended for use in a re-code loop in vp10_compress_frame where the
  // quantizer value is adjusted between loop iterations.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
+    memcpy(cc->nmv_costs[i][0], cpi->nmv_costs[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][0]));
+    memcpy(cc->nmv_costs[i][1], cpi->nmv_costs[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs[i][1]));
+    memcpy(cc->nmv_costs_hp[i][0], cpi->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][0]));
+    memcpy(cc->nmv_costs_hp[i][1], cpi->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cpi->nmv_costs_hp[i][1]));
+  }
+#else
  vp10_copy(cc->nmvjointcost,  cpi->td.mb.nmvjointcost);
+#endif

  memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
         MV_VALS * sizeof(*cpi->nmvcosts[0]));
@ -440,10 +490,27 @@ static void save_coding_context(VP10_COMP *cpi) {
 static void restore_coding_context(VP10_COMP *cpi) {
  CODING_CONTEXT *const cc = &cpi->coding_context;
  VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+  int i;
+#endif

  // Restore key state variables to the snapshot state stored in the
  // previous call to vp10_save_coding_context.
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    vp10_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
+    memcpy(cpi->nmv_costs[i][0], cc->nmv_costs[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs[i][0]));
+    memcpy(cpi->nmv_costs[i][1], cc->nmv_costs[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs[i][1]));
+    memcpy(cpi->nmv_costs_hp[i][0], cc->nmv_costs_hp[i][0],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][0]));
+    memcpy(cpi->nmv_costs_hp[i][1], cc->nmv_costs_hp[i][1],
+           MV_VALS * sizeof(*cc->nmv_costs_hp[i][1]));
+  }
+#else
  vp10_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
+#endif

  memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
  memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
@ -1640,6 +1707,19 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,

  realloc_segmentation_maps(cpi);

+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][1])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][0],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][0])));
+    CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][1],
+                    vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][1])));
+  }
+#endif
+
  CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
                  vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
  CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
@ -1715,14 +1795,23 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
  cpi->first_time_stamp_ever = INT64_MAX;

  cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
+#if CONFIG_REF_MV
+  for (i = 0; i < NMV_CONTEXTS; ++i) {
+    cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
+    cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
+    cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
+  }
+#else
  cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
  cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
+  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
+  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
+#endif
  cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
  cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
  cal_nmvsadcosts(cpi->td.mb.nmvsadcost);

-  cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
-  cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
  cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
  cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
  cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@ -55,6 +55,12 @@ typedef struct {
  int nmvcosts[2][MV_VALS];
  int nmvcosts_hp[2][MV_VALS];

+#if CONFIG_REF_MV
+  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
+  int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
+#endif
+
  unsigned char *last_frame_seg_map_copy;

  // 0 = Intra, Last, GF, ARF
@ -352,6 +358,11 @@ typedef struct VP10_COMP {

  CODING_CONTEXT coding_context;

+#if CONFIG_REF_MV
+  int *nmv_costs[NMV_CONTEXTS][2];
+  int *nmv_costs_hp[NMV_CONTEXTS][2];
+#endif
+
  int *nmvcosts[2];
  int *nmvcosts_hp[2];
  int *nmvsadcosts[2];
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@ -331,6 +331,16 @@ static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
  }
 }

+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame) {
+  MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
+  int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[ref_frame],
+                             mbmi_ext->ref_mv_stack[ref_frame]);
+  x->mvcost = x->mv_cost_stack[nmv_ctx];
+  x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
+}
+#endif
+
 void vp10_initialize_rd_consts(VP10_COMP *cpi) {
  VP10_COMMON *const cm = &cpi->common;
  MACROBLOCK *const x = &cpi->td.mb;
@ -362,12 +372,16 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {

  if (!frame_is_intra_only(cm)) {
 #if CONFIG_REF_MV
-    int nmv_ctx = 0;
-    vp10_build_nmv_cost_table(x->nmvjointcost,
-                             cm->allow_high_precision_mv ? x->nmvcost_hp
-                                                         : x->nmvcost,
-                             &cm->fc->nmvc[nmv_ctx],
-                             cm->allow_high_precision_mv);
+    int nmv_ctx;
+    for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+      vp10_build_nmv_cost_table(x->nmv_vec_cost[nmv_ctx],
+                                cm->allow_high_precision_mv ?
+                                  x->nmvcost_hp[nmv_ctx] : x->nmvcost[nmv_ctx],
+                                &cm->fc->nmvc[nmv_ctx],
+                                cm->allow_high_precision_mv);
+    }
+    x->mvcost = x->mv_cost_stack[0];
+    x->nmvjointcost = x->nmv_vec_cost[0];
 #else
    vp10_build_nmv_cost_table(x->nmvjointcost,
                             cm->allow_high_precision_mv ? x->nmvcost_hp
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@ -292,6 +292,10 @@ YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const struct VP10_COMP *cpi,

 void vp10_init_me_luts(void);

+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
+#endif
+
 void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                              const struct macroblockd_plane *pd,
                              ENTROPY_CONTEXT t_above[16],
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@ -4011,6 +4011,10 @@ static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
    tmp_mv.col >>= 3;
    tmp_mv.row >>= 3;

+#if CONFIG_REF_MV
+    vp10_set_mvcost(x, refs[id]);
+#endif
+
    // Small-range full-pixel motion search.
    bestsme = vp10_refining_search_8p_c(x, &tmp_mv, sadpb,
                                       search_range,
@ -4308,6 +4312,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x,

          vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);

+#if CONFIG_REF_MV
+          vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif
          bestsme = vp10_full_pixel_search(
              cpi, x, bsize, &mvp_full, step_param, sadpb,
              cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
@ -4844,6 +4851,10 @@ static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
  pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
  pred_mv[2] = x->pred_mv[ref];

+#if CONFIG_REF_MV
+  vp10_set_mvcost(x, ref);
+#endif
+
  if (scaled_ref_frame) {
    int i;
    // Swap out the reference frame for a version that's been scaled to