SMOOTH_PRED: Use 8-bit weights for real.

Use 255 instead of 256, to restrict to 8-bits. Only noise level differences in performance. AWCY: High Latency Low Latency All Keyframes -0.01 -0.01 Video overall -0.01 -0.07 Google Set: All KF Video lowres -0.005 -0.029 midres -0.008 0.028 hdres -0.010 -0.022 Note: By moving from 18-bit to 8-bit and then cutting off at 255 (this change, overall effect is noise level too (neutral or better). Change-Id: I9f2852023015e36c01203bafe486ec400b2ba46f
2017-03-02 17:10:44 -08:00 · 2017-03-02 17:10:44 -08:00 · 4d5bbbd907
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@ -265,26 +265,26 @@ static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
 static const int sm_weight_log2_scale = 8;

 #if CONFIG_TX64X64
-static const uint32_t sm_weight_arrays[6][64] = {
+static const uint8_t sm_weight_arrays[6][64] = {
 #else
-static const uint32_t sm_weight_arrays[5][32] = {
+static const uint8_t sm_weight_arrays[5][32] = {
 #endif  // CONFIG_TX64X64
  // bs = 2
-  { 256, 128 },
+  { 255, 128 },
  // bs = 4
-  { 256, 149, 85, 64 },
+  { 255, 149, 85, 64 },
  // bs = 8
-  { 256, 197, 146, 105, 73, 50, 37, 32 },
+  { 255, 197, 146, 105, 73, 50, 37, 32 },
  // bs = 16
-  { 256, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16 },
+  { 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16 },
  // bs = 32
  {
-      256, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122,
+      255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122,
      111, 101, 92,  83,  74,  66,  59,  52,  45,  39,  34,
      29,  25,  21,  17,  14,  12,  10,  9,   8,   8 },
 #if CONFIG_TX64X64
  // bs = 64
-  { 256, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169,
+  { 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169,
    163, 156, 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96,
    91,  86,  82,  77,  73,  69,  65,  61,  57,  54,  50,  47,  44,
    41,  38,  35,  32,  29,  27,  25,  22,  20,  18,  16,  15,  13,
@ -292,6 +292,12 @@ static const uint32_t sm_weight_arrays[5][32] = {
 #endif  // CONFIG_TX64X64
 };

+// Some basic checks on weights for smooth predictor.
+#define sm_weights_sanity_checks(weights, weights_scale, pred_scale) \
+  assert(weights[0] < weights_scale);                                \
+  assert(weights_scale - weights[bs - 1] < weights_scale);           \
+  assert(pred_scale < 31)  // ensures no overflow when calculating predictor.
+
 #define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))

 static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
@ -299,18 +305,17 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
  const uint8_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
  const uint8_t right_pred = above[bs - 1];  // estimated by top-right pixel
  const int arr_index = (int)lround(log2(bs)) - 1;
-  const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
  // scale = 2 * 2^sm_weight_log2_scale
  const int log2_scale = 1 + sm_weight_log2_scale;
-  assert(log2_scale + 8 < 8 * 31);  // sanity check: no overflow.
-  const uint32_t scale = sm_weights[0];
-  assert((int)scale == (1 << sm_weight_log2_scale));
+  const uint16_t scale = (1 << sm_weight_log2_scale);
+  sm_weights_sanity_checks(sm_weights, scale, log2_scale + sizeof(*dst));
  int r;
  for (r = 0; r < bs; ++r) {
    int c;
    for (c = 0; c < bs; ++c) {
      const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
-      const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
+      const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r],
                                  sm_weights[c], scale - sm_weights[c] };
      uint32_t this_pred = 0;
      int i;
@ -1033,18 +1038,17 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
  const uint16_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
  const uint16_t right_pred = above[bs - 1];  // estimated by top-right pixel
  const int arr_index = (int)lround(log2(bs)) - 1;
-  const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
  // scale = 2 * 2^sm_weight_log2_scale
  const int log2_scale = 1 + sm_weight_log2_scale;
-  assert(log2_scale + 8 < 8 * 31);  // sanity check: no overflow.
-  const uint32_t scale = sm_weights[0];
-  assert((int)scale == (1 << sm_weight_log2_scale));
+  const uint16_t scale = (1 << sm_weight_log2_scale);
+  sm_weights_sanity_checks(sm_weights, scale, log2_scale + sizeof(*dst));
  int r;
  for (r = 0; r < bs; ++r) {
    int c;
    for (c = 0; c < bs; ++c) {
      const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
-      const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
+      const uint8_t weights[] = { sm_weights[r], scale - sm_weights[r],
                                  sm_weights[c], scale - sm_weights[c] };
      uint32_t this_pred = 0;
      int i;