SMOOTH_PRED: Use get_msb() to get log2 of block dimension.

Apart from being inefficient, the floating point operation log2() was resulting in an assertion failure due to an unrelated floating point exception that happens earlier. Related: update the MD5s in test_intra_pred_speed to fix that failure too. BUG=aomedia:384 Change-Id: I18dc0733e880bac21b3d07ad874f8ae341f59f06
2017-03-09 13:19:03 -08:00 · 2017-03-09 13:19:03 -08:00 · ee7ee7f49f
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@ -17,6 +17,7 @@

 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
+#include "aom_ports/bitops.h"

 #define DST(x, y) dst[(x) + (y)*stride]
 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
@ -265,10 +266,15 @@ static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
 static const int sm_weight_log2_scale = 8;

 #if CONFIG_TX64X64
-static const uint8_t sm_weight_arrays[6][64] = {
+// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
+#define MAX_BLOCK_DIM 64
+#define NUM_BLOCK_DIMS 6  // log2(MAX_BLOCK_DIM)
 #else
-static const uint8_t sm_weight_arrays[5][32] = {
+#define MAX_BLOCK_DIM 32
+#define NUM_BLOCK_DIMS 5
 #endif  // CONFIG_TX64X64
+
+static const uint8_t sm_weight_arrays[NUM_BLOCK_DIMS][MAX_BLOCK_DIM] = {
  // bs = 2
  { 255, 128 },
  // bs = 4
@ -304,7 +310,9 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                    const uint8_t *above, const uint8_t *left) {
  const uint8_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
  const uint8_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = (int)lround(log2(bs)) - 1;
+  const int arr_index = get_msb(bs) - 1;
+  assert(arr_index >= 0);
+  assert(arr_index < NUM_BLOCK_DIMS);
  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
  // scale = 2 * 2^sm_weight_log2_scale
  const int log2_scale = 1 + sm_weight_log2_scale;
@ -1037,7 +1045,9 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
                                           const uint16_t *left, int bd) {
  const uint16_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
  const uint16_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = (int)lround(log2(bs)) - 1;
+  const int arr_index = get_msb(bs) - 1;
+  assert(arr_index >= 0);
+  assert(arr_index < NUM_BLOCK_DIMS);
  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
  // scale = 2 * 2^sm_weight_log2_scale
  const int log2_scale = 1 + sm_weight_log2_scale;
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@ -104,7 +104,7 @@ void TestIntraPred4(AvxPredFunc const *pred_funcs) {
    "c0889e2039bcf7bcb5d2f33cdca69adc",
 #if CONFIG_ALT_INTRA
    "828c49a4248993cce4876fa26eab697f",
-    "c106e0dc44de3d33c62b7bc0bc63c550"
+    "718c8cee9011f92ef31f77a9a7560010"
 #else
    "309a618577b27c648f9c5ee45252bc8f",
 #endif  // CONFIG_ALT_INTRA
@ -129,7 +129,7 @@ void TestIntraPred8(AvxPredFunc const *pred_funcs) {
    "95f7bfc262329a5849eda66d8f7c68ce",
 #if CONFIG_ALT_INTRA
    "f6ade499c626d38eb70661184b79bc57",
-    "f9217748b7188479c2990e42d2dc1da1"
+    "1ad5b106c79b792e514ba25e87139b5e"
 #else
    "815b75c8e0d91cc1ae766dc5d3e445a3",
 #endif  // CONFIG_ALT_INTRA
@ -154,7 +154,7 @@ void TestIntraPred16(AvxPredFunc const *pred_funcs) {
    "a8fe1c70432f09d0c20c67bdb6432c4d",
 #if CONFIG_ALT_INTRA
    "7adcaaa3554eb71a81fc48cb9043984b",
-    "de44142b9670ab7c85d4c318c47257e5"
+    "c0acea4397c1b4d54a21bbcec5731dff"
 #else
    "b8a41aa968ec108af447af4217cba91b",
 #endif  // CONFIG_ALT_INTRA
@ -179,7 +179,7 @@ void TestIntraPred32(AvxPredFunc const *pred_funcs) {
    "f162b51ed618d28b936974cff4391da5",
 #if CONFIG_ALT_INTRA
    "297e8fbb5d33c29b12b228fa9d7c40a4",
-    "a08d5b7e104c5fc2b203789ee5f725a7"
+    "31b9296d70dd82238c87173e6d5e65fd"
 #else
    "9e1370c6d42e08d357d9612c93a71cfc",
 #endif  // CONFIG_ALT_INTRA