adapt_scan experiment

Performance improvement BDRate lowres 0.921% midres 0.730% hdres 1.019% Change-Id: I26208d6c0531937bff44de505b4ea355c7852802
2016-10-21 16:44:47 -07:00 · 2016-10-21 16:44:47 -07:00 · ed8cd9a9b4
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@ -12,6 +12,7 @@
 #include "av1/common/entropy.h"
 #include "av1/common/blockd.h"
 #include "av1/common/onyxc_int.h"
+#include "av1/common/scan.h"
 #include "av1/common/entropymode.h"
 #include "aom_mem/aom_mem.h"
 #include "aom/aom_integer.h"
@ -2842,6 +2843,10 @@ void av1_default_coef_probs(AV1_COMMON *cm) {
 #endif  // CONFIG_RANS
 }

+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
                             unsigned int count_sat,
                             unsigned int update_factor) {
@ -2881,9 +2886,13 @@ static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
 }

 void av1_adapt_coef_probs(AV1_COMMON *cm) {
-  TX_SIZE t;
+  TX_SIZE tx_size;
  unsigned int count_sat, update_factor;

+#if CONFIG_ADAPT_SCAN
+  TX_TYPE tx_type;
+#endif
+
 #if CONFIG_ENTROPY
  if (cm->last_frame_type == KEY_FRAME) {
    update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
@ -2904,11 +2913,19 @@ void av1_adapt_coef_probs(AV1_COMMON *cm) {
    count_sat = COEF_COUNT_SAT;
  }
 #endif  // CONFIG_ENTROPY
-  for (t = TX_4X4; t <= TX_32X32; t++)
-    adapt_coef_probs(cm, t, count_sat, update_factor);
+  for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+    adapt_coef_probs(cm, tx_size, count_sat, update_factor);
 #if CONFIG_RANS
  av1_coef_pareto_cdfs(cm->fc);
 #endif  // CONFIG_RANS
+
+#if CONFIG_ADAPT_SCAN
+  for (tx_size = TX_4X4; tx_size < TX_SIZES; ++tx_size)
+    for (tx_type = TX_4X4; tx_type < TX_TYPES; ++tx_type) {
+      av1_update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE_16);
+      av1_update_scan_order_facade(cm, tx_size, tx_type);
+    }
+#endif
 }

 #if CONFIG_ENTROPY
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@ -288,6 +288,10 @@ void av1_coef_pareto_cdfs(struct frame_contexts *fc);

 #endif  // CONFIG_ENTROPY

+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static INLINE aom_prob av1_merge_probs(aom_prob pre_prob,
                                       const unsigned int ct[2],
                                       unsigned int count_sat,
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@ -12,6 +12,7 @@
 #include "aom_mem/aom_mem.h"

 #include "av1/common/reconinter.h"
+#include "av1/common/scan.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/seg_common.h"

@ -1755,6 +1756,9 @@ void av1_setup_past_independence(AV1_COMMON *cm) {
  av1_default_coef_probs(cm);
  init_mode_probs(cm->fc);
  av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
  cm->fc->initialized = 1;

  if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@ -837,8 +837,10 @@ void av1_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
  if (eob == 1)
    // DC only DCT coefficient
    aom_idct8x8_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
  else if (eob <= 12)
    aom_idct8x8_12_add(input, dest, stride);
+#endif
  else
    aom_idct8x8_64_add(input, dest, stride);
 }
@ -849,19 +851,22 @@ void av1_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
   * coefficients. Use eobs to separate different cases. */
  if (eob == 1) /* DC only DCT coefficient. */
    aom_idct16x16_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
  else if (eob <= 10)
    aom_idct16x16_10_add(input, dest, stride);
+#endif
  else
    aom_idct16x16_256_add(input, dest, stride);
 }

 void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                       int eob) {
-  if (eob == 1)
-    aom_idct32x32_1_add(input, dest, stride);
+  if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
  else if (eob <= 34)
    // non-zero coeff only in upper-left 8x8
    aom_idct32x32_34_add(input, dest, stride);
+#endif
  else
    aom_idct32x32_1024_add(input, dest, stride);
 }
@ -1659,13 +1664,13 @@ void av1_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
  // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
  // Combine that with code here.
  // DC only DCT coefficient
-  if (eob == 1) {
-    aom_highbd_idct8x8_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct8x8_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
    aom_highbd_idct8x8_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
    aom_highbd_idct8x8_64_add(input, dest, stride, bd);
-  }
 }

 void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
@ -1673,25 +1678,25 @@ void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
  // The calculation can be simplified if there are not many non-zero dct
  // coefficients. Use eobs to separate different cases.
  // DC only DCT coefficient.
-  if (eob == 1) {
-    aom_highbd_idct16x16_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct16x16_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
    aom_highbd_idct16x16_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
    aom_highbd_idct16x16_256_add(input, dest, stride, bd);
-  }
 }

 void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
                              int stride, int eob, int bd) {
  // Non-zero coeff only in upper-left 8x8
-  if (eob == 1) {
-    aom_highbd_idct32x32_1_add(input, dest, stride, bd);
-  } else if (eob <= 34) {
+  if (eob == 1) aom_highbd_idct32x32_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 34)
    aom_highbd_idct32x32_34_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
    aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
-  }
 }

 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@ -82,6 +82,10 @@ static INLINE const SCAN_ORDER *get_inter_scan(TX_SIZE tx_size,

 static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
                                         TX_TYPE tx_type, int is_inter) {
+#if CONFIG_ADAPT_SCAN
+  (void)is_inter;
+  return &cm->fc->sc[tx_size][tx_type];
+#else  // CONFIG_ADAPT_SCAN
  (void)cm;
 #if CONFIG_EXT_TX
  return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
@ -90,6 +94,7 @@ static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
  (void)is_inter;
  return &av1_intra_scan_orders[tx_size][tx_type];
 #endif  // CONFIG_EXT_TX
+#endif  // CONFIG_ADAPT_SCAN
 }

 #ifdef __cplusplus
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@ -303,6 +303,9 @@ static void predict_and_reconstruct_intra_block(AV1_COMMON *cm,
    const int eob =
        av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size,
                                tx_type, &max_scan_line, r, mbmi->segment_id);
+#if CONFIG_ADAPT_SCAN
+    av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
    if (eob)
      inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
                              max_scan_line, eob);
@ -385,6 +388,9 @@ static int reconstruct_inter_block(AV1_COMMON *cm, MACROBLOCKD *const xd,
  const int eob =
      av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, tx_type,
                              &max_scan_line, r, segment_id);
+#if CONFIG_ADAPT_SCAN
+  av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
  if (eob)
    inverse_transform_block(xd, plane, tx_type, tx_size,
                            &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@ -535,6 +535,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
  }

  av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
  av1_initialize_rd_consts(cpi);

  // Tiling is ignored in the first pass.
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@ -534,6 +534,14 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,

  *tp = t;

+#if CONFIG_ADAPT_SCAN
+  // Since dqcoeff is not available here, we pass qcoeff into
+  // av1_update_scan_count_facade(). The update behavior should be the same
+  // because av1_update_scan_count_facade() only cares if coefficients are zero
+  // or not.
+  av1_update_scan_count_facade((AV1_COMMON *)cm, tx_size, tx_type, qcoeff, c);
+#endif
+
  av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
 }

--- a/test/av1_inv_txfm_test.cc
+++ b/test/av1_inv_txfm_test.cc
@ -137,6 +137,7 @@ class AV1PartialIDctTest
  InvTxfmFunc partial_itxfm_;
 };

+#if !CONFIG_ADAPT_SCAN
 TEST_P(AV1PartialIDctTest, RunQuantCheck) {
  int size;
  switch (tx_size_) {
@ -256,6 +257,7 @@ TEST_P(AV1PartialIDctTest, ResultsMatch) {
  EXPECT_EQ(0, max_error)
      << "Error: partial inverse transform produces different results";
 }
+#endif
 using std::tr1::make_tuple;

 INSTANTIATE_TEST_CASE_P(
--- a/test/test.mk
+++ b/test/test.mk
@ -113,6 +113,7 @@ LIBAOM_TEST_SRCS-yes                   += divu_small_test.cc
 #LIBAOM_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
 endif

+LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
 #LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += lpf_8_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_CLPF)        += clpf_test.cc