VPX: removed step checks from mips convolve code

The check is handled by the predictor table. Change-Id: I5e5084ebb46be8087c8c9d80b5f76e919a1cd05b
2015-08-13 11:27:04 -07:00 · 2015-08-13 11:27:04 -07:00 · aeea00cc4f
--- a/vpx_dsp/mips/convolve2_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve2_avg_dspr2.c
@ -233,47 +233,41 @@ void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
                                  int w, int h) {
-  if (16 == y_step_q4) {
-    uint32_t pos = 38;
+  uint32_t pos = 38;

-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
+  assert(y_step_q4 == 16);

-    prefetch_store(dst);
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );

-    switch (w) {
-      case 4:
-      case 8:
-      case 16:
-      case 32:
-        convolve_bi_avg_vert_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_y, w, h);
-        break;
-      case 64:
-        prefetch_store(dst + 32);
-        convolve_bi_avg_vert_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_y, h);
-        break;
-      default:
-        vpx_convolve8_avg_vert_c(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_x, x_step_q4,
-                                 filter_y, y_step_q4,
-                                 w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_avg_vert_c(src, src_stride,
-                             dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4,
-                             w, h);
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+      convolve_bi_avg_vert_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_y, w, h);
+      break;
+    case 64:
+      prefetch_store(dst + 32);
+      convolve_bi_avg_vert_64_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_y, h);
+      break;
+    default:
+      vpx_convolve8_avg_vert_c(src, src_stride,
+                               dst, dst_stride,
+                               filter_x, x_step_q4,
+                               filter_y, y_step_q4,
+                               w, h);
+      break;
  }
 }
 #endif
--- a/vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
@ -768,64 +768,58 @@ void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
                                   int w, int h) {
-  if (16 == x_step_q4) {
-    uint32_t pos = 38;
+  uint32_t pos = 38;

-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
+  assert(x_step_q4 == 16);

-    /* prefetch data to cache memory */
-    prefetch_load(src);
-    prefetch_load(src + 32);
-    prefetch_store(dst);
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );

-    switch (w) {
-      case 4:
-        convolve_bi_avg_horiz_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-        break;
-      case 8:
-        convolve_bi_avg_horiz_8_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-        break;
-      case 16:
-        convolve_bi_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 1);
-        break;
-      case 32:
-        convolve_bi_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 2);
-        break;
-      case 64:
-        prefetch_load(src + 64);
-        prefetch_store(dst + 32);
+  /* prefetch data to cache memory */
+  prefetch_load(src);
+  prefetch_load(src + 32);
+  prefetch_store(dst);

-        convolve_bi_avg_horiz_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h);
-        break;
-      default:
-        vpx_convolve8_avg_horiz_c(src, src_stride,
-                                  dst, dst_stride,
-                                  filter_x, x_step_q4,
-                                  filter_y, y_step_q4,
-                                  w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_avg_horiz_c(src, src_stride,
-                              dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4,
-                              w, h);
+  switch (w) {
+    case 4:
+      convolve_bi_avg_horiz_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+      break;
+    case 8:
+      convolve_bi_avg_horiz_8_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+      break;
+    case 16:
+      convolve_bi_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 1);
+      break;
+    case 32:
+      convolve_bi_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 2);
+      break;
+    case 64:
+      prefetch_load(src + 64);
+      prefetch_store(dst + 32);
+
+      convolve_bi_avg_horiz_64_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h);
+      break;
+    default:
+      vpx_convolve8_avg_horiz_c(src, src_stride,
+                                dst, dst_stride,
+                                filter_x, x_step_q4,
+                                filter_y, y_step_q4,
+                                w, h);
+      break;
  }
 }
 #endif
--- a/vpx_dsp/mips/convolve2_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve2_horiz_dspr2.c
@ -646,66 +646,60 @@ void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
-  if (16 == x_step_q4) {
-    uint32_t pos = 38;
+  uint32_t pos = 38;

-    prefetch_load((const uint8_t *)filter_x);
+  assert(x_step_q4 == 16);

-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
+  prefetch_load((const uint8_t *)filter_x);

-    /* prefetch data to cache memory */
-    prefetch_load(src);
-    prefetch_load(src + 32);
-    prefetch_store(dst);
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );

-    switch (w) {
-      case 4:
-        convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-        break;
-      case 8:
-        convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-        break;
-      case 16:
-        convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h, 1);
-        break;
-      case 32:
-        convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h, 2);
-        break;
-      case 64:
-        prefetch_load(src + 64);
-        prefetch_store(dst + 32);
+  /* prefetch data to cache memory */
+  prefetch_load(src);
+  prefetch_load(src + 32);
+  prefetch_store(dst);

-        convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
-                                   dst, (int32_t)dst_stride,
-                                   filter_x, (int32_t)h);
-        break;
-      default:
-        vpx_convolve8_horiz_c(src, src_stride,
-                              dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4,
-                              w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_horiz_c(src, src_stride,
-                          dst, dst_stride,
-                          filter_x, x_step_q4,
-                          filter_y, y_step_q4,
-                          w, h);
+  switch (w) {
+    case 4:
+      convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+      break;
+    case 8:
+      convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+      break;
+    case 16:
+      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h, 1);
+      break;
+    case 32:
+      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h, 2);
+      break;
+    case 64:
+      prefetch_load(src + 64);
+      prefetch_store(dst + 32);
+
+      convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filter_x, (int32_t)h);
+      break;
+    default:
+      vpx_convolve8_horiz_c(src, src_stride,
+                            dst, dst_stride,
+                            filter_x, x_step_q4,
+                            filter_y, y_step_q4,
+                            w, h);
+      break;
  }
 }
 #endif
--- a/vpx_dsp/mips/convolve2_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve2_vert_dspr2.c
@ -218,47 +218,41 @@ void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
                              int w, int h) {
-  if (16 == y_step_q4) {
-    uint32_t pos = 38;
+  uint32_t pos = 38;

-    /* bit positon for extract from acc */
-    __asm__ __volatile__ (
-      "wrdsp      %[pos],     1           \n\t"
-      :
-      : [pos] "r" (pos)
-    );
+  assert(y_step_q4 == 16);

-    prefetch_store(dst);
+  /* bit positon for extract from acc */
+  __asm__ __volatile__ (
+    "wrdsp      %[pos],     1           \n\t"
+    :
+    : [pos] "r" (pos)
+  );

-    switch (w) {
-      case 4 :
-      case 8 :
-      case 16 :
-      case 32 :
-        convolve_bi_vert_4_dspr2(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_y, w, h);
-        break;
-      case 64 :
-        prefetch_store(dst + 32);
-        convolve_bi_vert_64_dspr2(src, src_stride,
-                                  dst, dst_stride,
-                                  filter_y, h);
-        break;
-      default:
-        vpx_convolve8_vert_c(src, src_stride,
-                             dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4,
-                             w, h);
-        break;
-    }
-  } else {
-    vpx_convolve8_vert_c(src, src_stride,
-                         dst, dst_stride,
-                         filter_x, x_step_q4,
-                         filter_y, y_step_q4,
-                         w, h);
+  prefetch_store(dst);
+
+  switch (w) {
+    case 4 :
+    case 8 :
+    case 16 :
+    case 32 :
+      convolve_bi_vert_4_dspr2(src, src_stride,
+                               dst, dst_stride,
+                               filter_y, w, h);
+      break;
+    case 64 :
+      prefetch_store(dst + 32);
+      convolve_bi_vert_64_dspr2(src, src_stride,
+                                dst, dst_stride,
+                                filter_y, h);
+      break;
+    default:
+      vpx_convolve8_vert_c(src, src_stride,
+                           dst, dst_stride,
+                           filter_x, x_step_q4,
+                           filter_y, y_step_q4,
+                           w, h);
+      break;
  }
 }
 #endif
--- a/vpx_dsp/mips/convolve8_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_dspr2.c
@ -347,6 +347,7 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                  const int16_t *filter_x, int x_step_q4,
                                  const int16_t *filter_y, int y_step_q4,
                                  int w, int h) {
+  assert(y_step_q4 == 16);
  if (((const int32_t *)filter_y)[1] == 0x800000) {
    vpx_convolve_avg(src, src_stride,
                     dst, dst_stride,
@ -360,47 +361,39 @@ void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                 filter_y, y_step_q4,
                                 w, h);
  } else {
-    if (16 == y_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;

-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );

-      prefetch_store(dst);
+    prefetch_store(dst);

-      switch (w) {
-        case 4:
-        case 8:
-        case 16:
-        case 32:
-          convolve_avg_vert_4_dspr2(src, src_stride,
-                                    dst, dst_stride,
-                                    filter_y, w, h);
-          break;
-        case 64:
-          prefetch_store(dst + 32);
-          convolve_avg_vert_64_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_y, h);
-          break;
-        default:
-          vpx_convolve8_avg_vert_c(src, src_stride,
+    switch (w) {
+      case 4:
+      case 8:
+      case 16:
+      case 32:
+        convolve_avg_vert_4_dspr2(src, src_stride,
+                                  dst, dst_stride,
+                                  filter_y, w, h);
+        break;
+      case 64:
+        prefetch_store(dst + 32);
+        convolve_avg_vert_64_dspr2(src, src_stride,
                                   dst, dst_stride,
-                                   filter_x, x_step_q4,
-                                   filter_y, y_step_q4,
-                                   w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_avg_vert_c(src, src_stride,
-                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
+                                   filter_y, h);
+        break;
+      default:
+        vpx_convolve8_avg_vert_c(src, src_stride,
+                                 dst, dst_stride,
+                                 filter_x, x_step_q4,
+                                 filter_y, y_step_q4,
+                                 w, h);
+        break;
    }
  }
 }
@ -416,17 +409,12 @@ void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,

  assert(w <= 64);
  assert(h <= 64);
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);

  if (intermediate_height < h)
    intermediate_height = h;

-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vpx_convolve8_avg_c(src, src_stride,
-                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
-
  vpx_convolve8_horiz(src - (src_stride * 3), src_stride,
                      temp, 64,
                      filter_x, x_step_q4,
--- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
@ -957,6 +957,7 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                   const int16_t *filter_x, int x_step_q4,
                                   const int16_t *filter_y, int y_step_q4,
                                   int w, int h) {
+  assert(x_step_q4 == 16);
  if (((const int32_t *)filter_x)[1] == 0x800000) {
    vpx_convolve_avg(src, src_stride,
                     dst, dst_stride,
@ -970,66 +971,58 @@ void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                                  filter_y, y_step_q4,
                                  w, h);
  } else {
-    if (16 == x_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;

-      src -= 3;
+    src -= 3;

-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );

-      /* prefetch data to cache memory */
-      prefetch_load(src);
-      prefetch_load(src + 32);
-      prefetch_store(dst);
+    /* prefetch data to cache memory */
+    prefetch_load(src);
+    prefetch_load(src + 32);
+    prefetch_store(dst);

-      switch (w) {
-        case 4:
-          convolve_avg_horiz_4_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-          break;
-        case 8:
-          convolve_avg_horiz_8_dspr2(src, src_stride,
-                                     dst, dst_stride,
-                                     filter_x, h);
-          break;
-        case 16:
-          convolve_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 1);
-          break;
-        case 32:
-          convolve_avg_horiz_16_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h, 2);
-          break;
-        case 64:
-          prefetch_load(src + 64);
-          prefetch_store(dst + 32);
-
-          convolve_avg_horiz_64_dspr2(src, src_stride,
-                                      dst, dst_stride,
-                                      filter_x, h);
-          break;
-        default:
-          vpx_convolve8_avg_horiz_c(src + 3, src_stride,
+    switch (w) {
+      case 4:
+        convolve_avg_horiz_4_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+        break;
+      case 8:
+        convolve_avg_horiz_8_dspr2(src, src_stride,
+                                   dst, dst_stride,
+                                   filter_x, h);
+        break;
+      case 16:
+        convolve_avg_horiz_16_dspr2(src, src_stride,
                                    dst, dst_stride,
-                                    filter_x, x_step_q4,
-                                    filter_y, y_step_q4,
-                                    w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_avg_horiz_c(src, src_stride,
-                                dst, dst_stride,
-                                filter_x, x_step_q4,
-                                filter_y, y_step_q4,
-                                w, h);
+                                    filter_x, h, 1);
+        break;
+      case 32:
+        convolve_avg_horiz_16_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h, 2);
+        break;
+      case 64:
+        prefetch_load(src + 64);
+        prefetch_store(dst + 32);
+
+        convolve_avg_horiz_64_dspr2(src, src_stride,
+                                    dst, dst_stride,
+                                    filter_x, h);
+        break;
+      default:
+        vpx_convolve8_avg_horiz_c(src + 3, src_stride,
+                                  dst, dst_stride,
+                                  filter_x, x_step_q4,
+                                  filter_y, y_step_q4,
+                                  w, h);
+        break;
    }
  }
 }
--- a/vpx_dsp/mips/convolve8_dspr2.c
+++ b/vpx_dsp/mips/convolve8_dspr2.c
@ -936,6 +936,9 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
  int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
  uint32_t pos = 38;

+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);
+
  /* bit positon for extract from acc */
  __asm__ __volatile__ (
    "wrdsp      %[pos],     1           \n\t"
@ -946,13 +949,6 @@ void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride,
  if (intermediate_height < h)
    intermediate_height = h;

-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vpx_convolve8_c(src, src_stride,
-                           dst, dst_stride,
-                           filter_x, x_step_q4,
-                           filter_y, y_step_q4,
-                           w, h);
-
  if ((((const int32_t *)filter_x)[1] == 0x800000)
      && (((const int32_t *)filter_y)[1] == 0x800000))
    return vpx_convolve_copy(src, src_stride,
--- a/vpx_dsp/mips/convolve8_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c
@ -841,6 +841,7 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                               const int16_t *filter_x, int x_step_q4,
                               const int16_t *filter_y, int y_step_q4,
                               int w, int h) {
+  assert(x_step_q4 == 16);
  if (((const int32_t *)filter_x)[1] == 0x800000) {
    vpx_convolve_copy(src, src_stride,
                      dst, dst_stride,
@ -854,67 +855,59 @@ void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                              filter_y, y_step_q4,
                              w, h);
  } else {
-    if (16 == x_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;

-      prefetch_load((const uint8_t *)filter_x);
-      src -= 3;
+    prefetch_load((const uint8_t *)filter_x);
+    src -= 3;

-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );

-      /* prefetch data to cache memory */
-      prefetch_load(src);
-      prefetch_load(src + 32);
-      prefetch_store(dst);
+    /* prefetch data to cache memory */
+    prefetch_load(src);
+    prefetch_load(src + 32);
+    prefetch_store(dst);

-      switch (w) {
-        case 4:
-          convolve_horiz_4_dspr2(src, (int32_t)src_stride,
-                                 dst, (int32_t)dst_stride,
-                                 filter_x, (int32_t)h);
-          break;
-        case 8:
-          convolve_horiz_8_dspr2(src, (int32_t)src_stride,
-                                 dst, (int32_t)dst_stride,
-                                 filter_x, (int32_t)h);
-          break;
-        case 16:
-          convolve_horiz_16_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h, 1);
-          break;
-        case 32:
-          convolve_horiz_16_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h, 2);
-          break;
-        case 64:
-          prefetch_load(src + 64);
-          prefetch_store(dst + 32);
+    switch (w) {
+      case 4:
+        convolve_horiz_4_dspr2(src, (int32_t)src_stride,
+                               dst, (int32_t)dst_stride,
+                               filter_x, (int32_t)h);
+        break;
+      case 8:
+        convolve_horiz_8_dspr2(src, (int32_t)src_stride,
+                               dst, (int32_t)dst_stride,
+                               filter_x, (int32_t)h);
+        break;
+      case 16:
+        convolve_horiz_16_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h, 1);
+        break;
+      case 32:
+        convolve_horiz_16_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h, 2);
+        break;
+      case 64:
+        prefetch_load(src + 64);
+        prefetch_store(dst + 32);

-          convolve_horiz_64_dspr2(src, (int32_t)src_stride,
-                                  dst, (int32_t)dst_stride,
-                                  filter_x, (int32_t)h);
-          break;
-        default:
-          vpx_convolve8_horiz_c(src + 3, src_stride,
-                                dst, dst_stride,
-                                filter_x, x_step_q4,
-                                filter_y, y_step_q4,
-                                w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_horiz_c(src, src_stride,
-                            dst, dst_stride,
-                            filter_x, x_step_q4,
-                            filter_y, y_step_q4,
-                            w, h);
+        convolve_horiz_64_dspr2(src, (int32_t)src_stride,
+                                dst, (int32_t)dst_stride,
+                                filter_x, (int32_t)h);
+        break;
+      default:
+        vpx_convolve8_horiz_c(src + 3, src_stride,
+                              dst, dst_stride,
+                              filter_x, x_step_q4,
+                              filter_y, y_step_q4,
+                              w, h);
+        break;
    }
  }
 }
--- a/vpx_dsp/mips/convolve8_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve8_vert_dspr2.c
@ -333,6 +333,7 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                              const int16_t *filter_x, int x_step_q4,
                              const int16_t *filter_y, int y_step_q4,
                              int w, int h) {
+  assert(y_step_q4 == 16);
  if (((const int32_t *)filter_y)[1] == 0x800000) {
    vpx_convolve_copy(src, src_stride,
                      dst, dst_stride,
@ -346,47 +347,39 @@ void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
                             filter_y, y_step_q4,
                             w, h);
  } else {
-    if (16 == y_step_q4) {
-      uint32_t pos = 38;
+    uint32_t pos = 38;

-      /* bit positon for extract from acc */
-      __asm__ __volatile__ (
-        "wrdsp      %[pos],     1           \n\t"
-        :
-        : [pos] "r" (pos)
-      );
+    /* bit positon for extract from acc */
+    __asm__ __volatile__ (
+      "wrdsp      %[pos],     1           \n\t"
+      :
+      : [pos] "r" (pos)
+    );

-      prefetch_store(dst);
+    prefetch_store(dst);

-      switch (w) {
-        case 4 :
-        case 8 :
-        case 16 :
-        case 32 :
-          convolve_vert_4_dspr2(src, src_stride,
-                                dst, dst_stride,
-                                filter_y, w, h);
-          break;
-        case 64 :
-          prefetch_store(dst + 32);
-          convolve_vert_64_dspr2(src, src_stride,
-                                 dst, dst_stride,
-                                 filter_y, h);
-          break;
-        default:
-          vpx_convolve8_vert_c(src, src_stride,
+    switch (w) {
+      case 4 :
+      case 8 :
+      case 16 :
+      case 32 :
+        convolve_vert_4_dspr2(src, src_stride,
+                              dst, dst_stride,
+                              filter_y, w, h);
+        break;
+      case 64 :
+        prefetch_store(dst + 32);
+        convolve_vert_64_dspr2(src, src_stride,
                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
-          break;
-      }
-    } else {
-      vpx_convolve8_vert_c(src, src_stride,
-                           dst, dst_stride,
-                           filter_x, x_step_q4,
-                           filter_y, y_step_q4,
-                           w, h);
+                               filter_y, h);
+        break;
+      default:
+        vpx_convolve8_vert_c(src, src_stride,
+                             dst, dst_stride,
+                             filter_x, x_step_q4,
+                             filter_y, y_step_q4,
+                             w, h);
+        break;
    }
  }
 }
--- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -665,12 +666,7 @@ void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                                 int w, int h) {
  int8_t cnt, filt_hor[8];

-  if (16 != x_step_q4) {
-    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);

  if (((const int32_t *)filter_x)[1] == 0x800000) {
    vpx_convolve_avg(src, src_stride, dst, dst_stride,
--- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -574,12 +575,8 @@ void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
                           int w, int h) {
  int8_t cnt, filt_hor[8], filt_ver[8];

-  if (16 != x_step_q4 || 16 != y_step_q4) {
-    vpx_convolve8_avg_c(src, src_stride, dst, dst_stride,
-                        filter_x, x_step_q4, filter_y, y_step_q4,
-                        w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);

  if (((const int32_t *)filter_x)[1] == 0x800000 &&
      ((const int32_t *)filter_y)[1] == 0x800000) {
--- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -639,12 +640,7 @@ void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                                int w, int h) {
  int8_t cnt, filt_ver[8];

-  if (16 != y_step_q4) {
-    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4, filter_y, y_step_q4,
-                             w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);

  if (((const int32_t *)filter_y)[1] == 0x800000) {
    vpx_convolve_avg(src, src_stride, dst, dst_stride,
--- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -625,12 +626,7 @@ void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
                             int w, int h) {
  int8_t cnt, filt_hor[8];

-  if (16 != x_step_q4) {
-    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                          filter_x, x_step_q4, filter_y, y_step_q4,
-                          w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);

  if (((const int32_t *)filter_x)[1] == 0x800000) {
    vpx_convolve_copy(src, src_stride, dst, dst_stride,
--- a/vpx_dsp/mips/vpx_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -548,12 +549,8 @@ void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
                       int32_t w, int32_t h) {
  int8_t cnt, filt_hor[8], filt_ver[8];

-  if (16 != x_step_q4 || 16 != y_step_q4) {
-    vpx_convolve8_c(src, src_stride, dst, dst_stride,
-                    filter_x, x_step_q4, filter_y, y_step_q4,
-                    w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
+  assert(y_step_q4 == 16);

  if (((const int32_t *)filter_x)[1] == 0x800000 &&
      ((const int32_t *)filter_y)[1] == 0x800000) {
--- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <assert.h>
 #include "./vpx_dsp_rtcd.h"
 #include "vpx_dsp/mips/vpx_convolve_msa.h"

@ -632,12 +633,7 @@ void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
                            int w, int h) {
  int8_t cnt, filt_ver[8];

-  if (16 != y_step_q4) {
-    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                         filter_x, x_step_q4, filter_y, y_step_q4,
-                         w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);

  if (((const int32_t *)filter_y)[1] == 0x800000) {
    vpx_convolve_copy(src, src_stride, dst, dst_stride,